From a89814659d38ee333c0d4fe24eef6cd56a088cd3 Mon Sep 17 00:00:00 2001 From: norases Date: Tue, 29 Jul 2014 15:50:01 -0700 Subject: [PATCH 1/6] Added Epsilon-First code and updated the view. --- moe/bandit/constant.py | 5 +- moe/bandit/epsilon_first.py | 128 +++++++++++++++++++++++++ moe/bandit/linkers.py | 7 +- moe/tests/bandit/epsilon_first_test.py | 73 ++++++++++++++ moe/views/rest/bandit_epsilon.py | 43 ++++++++- moe/views/schemas.py | 40 +++++++- 6 files changed, 286 insertions(+), 10 deletions(-) create mode 100644 moe/bandit/epsilon_first.py create mode 100644 moe/tests/bandit/epsilon_first_test.py diff --git a/moe/bandit/constant.py b/moe/bandit/constant.py index 1653997e..489e1b98 100644 --- a/moe/bandit/constant.py +++ b/moe/bandit/constant.py @@ -8,8 +8,11 @@ } } +# Default Hyperparameters DEFAULT_EPSILON = 0.05 +DEFAULT_TOTAL_SAMPLES = 100 # Epsilon subtypes +EPSILON_SUBTYPE_FIRST = 'first' EPSILON_SUBTYPE_GREEDY = 'greedy' -EPSILON_SUBTYPES = [EPSILON_SUBTYPE_GREEDY] +EPSILON_SUBTYPES = [EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY] diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py new file mode 100644 index 00000000..e6e16682 --- /dev/null +++ b/moe/bandit/epsilon_first.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +"""Classes (Python) to compute the Bandit Epsilon-First arm allocation and choosing the arm to pull next. + +See :class:`moe.bandit.epsilon.Epsilon` for further details on bandit. + +""" +import numpy + +from moe.bandit.constant import DEFAULT_EPSILON, DEFAULT_TOTAL_SAMPLES, EPSILON_SUBTYPE_FIRST +from moe.bandit.epsilon import Epsilon + + +class EpsilonFirst(Epsilon): + + r"""Implementation of EpsilonFirst. + + A class to encapsulate the computation of bandit epsilon first. + + total_samples is the total number of samples (#to sample + #sampled) + #sampled is calculated by summing up total from each arm sampled. + total_samples is T from :doc:`bandit`. + + See superclass :class:`moe.bandit.epsilon.Epsilon` for further details. + + """ + + def __init__( + self, + historical_info, + epsilon=DEFAULT_EPSILON, + total_samples=DEFAULT_TOTAL_SAMPLES, + ): + """Construct an EpsilonFirst object. See superclass :class:`moe.bandit.epsilon.Epsilon` for details. + + total_samples is the total number of samples (#to sample + #sampled) + #sampled is calculated by summing up total from each arm sampled. + total_samples is T from :doc:`bandit`. + + """ + super(EpsilonFirst, self).__init__( + historical_info=historical_info, + subtype=EPSILON_SUBTYPE_FIRST, + epsilon=epsilon, + ) + self._total_samples = total_samples + + def allocate_arms(self): + r"""Compute the allocation to each arm given ``historical_info``, running bandit ``subtype`` endpoint with hyperparameters in ``hyperparameter_info``. + + Computes the allocation to each arm based on the given subtype, historical info, and hyperparameter info. + + Works with k-armed bandits (k >= 1). + + The Algorithm: http://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions + + This method starts with a pure exploration phase, followed by a pure exploitation phase. + If we have a total of T trials, the first :math:`\epsilon` T trials, we only explore. + After that, we only exploit (t = :math:`\epsilon` T, :math:`\epsilon` T + 1, ..., T). + + In other words, this method will pull a random arm in the exploration phase. + Then this method will pull the optimal arm (best expected return) in the exploitation phase. + + In case of a tie in the exploitation phase, the method will split the probability 1 among the optimal arms. + + For example, if we have three arms, two arms (arm1 and arm2) with an average payoff of 0.5 ({win:10, losee:10, total:20}) + and a new arm (arm3, average payoff is 0 and total is 0). + + Let the epsilon :math:`\epsilon` be 0.1. + + The allocation depends on which phase we are in: + + Case 1: T = 50 + + Recall that T = #to sample + #sampled. #sampled = 20 + 20 + 0 = 40. + So we are on trial #41. We explore the first :math:`\epsilon T = 0.1 * 50 = 5` trials + and thus we are in the exploitation phase. We split probability 1 between the optimal arms arm1 and arm2. + + arm1: 0.5, arm2: 0.5, arm3: 0.0. + + Case 2: T = 500 + + We explore the first :math:`\epsilon T = 0.1 * 500 = 50` trials. + Since we are on trail #41, we are in the exploration phase. We choose arms randomly: + + arm1: 0.33, arm2: 0.33, arm3: 0.33. + + :return: the dictionary of (arm, allocation) key-value pairs + :rtype: a dictionary of (String(), float64) pairs + """ + arms_sampled = self._historical_info.arms_sampled + num_arms = self._historical_info.num_arms + + if not arms_sampled: + raise ValueError('sample_arms are empty!') + + num_sampled = sum([sampled_arm.total for sampled_arm in arms_sampled.itervalues()]) + # Exploration phase, trials 1,2,..., epsilon * T + # Allocate equal probability to all arms + if num_sampled < self._total_samples * self._epsilon: + equal_allocation = 1.0 / num_arms + arms_to_allocations = {} + for arm_name in arms_sampled.iterkeys(): + arms_to_allocations[arm_name] = equal_allocation + return arms_to_allocations + + # Exploitation phase, trials 1,2,..., epsilon * T+1, ..., T + avg_payoff_arm_name_list = [] + for arm_name, sampled_arm in arms_sampled.iteritems(): + avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0 + avg_payoff_arm_name_list.append((avg_payoff, arm_name)) + avg_payoff_arm_name_list.sort(reverse=True) + + best_payoff, _ = avg_payoff_arm_name_list[0] + # Filter out arms that have average payoff less than the best payoff + winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list) + # Extract a list of winning arm names from a list of (average payoff, arm name) tuples. + _, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list)) + winning_arm_names = frozenset(winning_arm_name_list) + + num_winning_arms = len(winning_arm_names) + arms_to_allocations = {} + + winning_arm_allocation = 1.0 / num_winning_arms + # Split allocation among winning arms, all other arms get allocation of 0. + for arm_name in arms_sampled.iterkeys(): + arms_to_allocations[arm_name] = winning_arm_allocation if arm_name in winning_arm_names else 0.0 + + return arms_to_allocations diff --git a/moe/bandit/linkers.py b/moe/bandit/linkers.py index e5727dc3..a7a5c8bf 100644 --- a/moe/bandit/linkers.py +++ b/moe/bandit/linkers.py @@ -2,7 +2,8 @@ """Links between the implementations of bandit algorithms.""" from collections import namedtuple -from moe.bandit.constant import EPSILON_SUBTYPE_GREEDY +from moe.bandit.constant import EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY +from moe.bandit.epsilon_first import EpsilonFirst from moe.bandit.epsilon_greedy import EpsilonGreedy # Epsilon @@ -16,6 +17,10 @@ EPSILON_SUBTYPES_TO_EPSILON_METHODS = { + EPSILON_SUBTYPE_FIRST: EpsilonMethod( + subtype=EPSILON_SUBTYPE_FIRST, + bandit_class=EpsilonFirst, + ), EPSILON_SUBTYPE_GREEDY: EpsilonMethod( subtype=EPSILON_SUBTYPE_GREEDY, bandit_class=EpsilonGreedy, diff --git a/moe/tests/bandit/epsilon_first_test.py b/moe/tests/bandit/epsilon_first_test.py new file mode 100644 index 00000000..cfe8c799 --- /dev/null +++ b/moe/tests/bandit/epsilon_first_test.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +"""Test epsilon-first bandit implementation. + +Test default values with one, two, and three arms. +Test one arm with various epsilon values. + +""" +import testify as T + +from moe.bandit.epsilon_first import EpsilonFirst +from moe.tests.bandit.epsilon_test_case import EpsilonTestCase + + +class EpsilonFirstTest(EpsilonTestCase): + + """Verify that different epsilon values and historical infos return correct results.""" + + bandit_class = EpsilonFirst + + total_samples_to_test = [1, 10, 100] + + def test_init_default(self): + """Verify that default values do not throw and error. This is purely an integration test.""" + self._test_init_default() + + def test_one_arm(self): + """Check that the one-arm case always returns the given arm as the winning arm and the allocation is 1.0.""" + for epsilon in self.epsilons_to_test: + for total_samples in self.total_samples_to_test: + bandit = self.bandit_class(self.one_arm_test_case, epsilon, total_samples) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0}) + T.assert_equal(bandit.choose_arm(), "arm1") + + def test_two_new_arms(self): + """Check that the two-new-arms case always allocate each arm equally (the allocation is 0.5 for both arms).""" + for epsilon in self.epsilons_to_test: + for total_samples in self.total_samples_to_test: + bandit = self.bandit_class(self.two_new_arms_test_case, epsilon, total_samples) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5}) + + def test_two_arms_epsilon_zero(self): + """Check that the two-arms case with zero epsilon (always exploit) always allocate arm1:1.0 and arm2:0.0 when average payoffs are arm1:1.0 and arm2:0.0.""" + epsilon = 0.0 + bandit = self.bandit_class(self.two_arms_test_case, epsilon) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0}) + T.assert_equal(bandit.choose_arm(), "arm1") + + def test_two_arms_epsilon_one(self): + """Check that the two-arms case with one epsilon (always explore) always allocate arm1:0.5 and arm2:0.5 when average payoffs are arm1:1.0 and arm2:0.0.""" + epsilon = 1.0 + bandit = self.bandit_class(self.two_arms_test_case, epsilon) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5}) + + def test_three_arms_explore(self): + """Check that the three-arms cases with integer and float payoffs in exploration phase return the expected arm allocations.""" + epsilon = 0.5 + total_samples = 10 + for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]: + bandit = self.bandit_class(historical_info, epsilon, total_samples) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0, "arm3": 0.0}) + + def test_three_arms_exploit(self): + """Check that the three-arms cases with integer and float payoffs in exploitation phase return the expected arm allocations.""" + epsilon = 0.7 + total_samples = 10 + equal_allocation = 1.0 / 3 + for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]: + bandit = self.bandit_class(historical_info, epsilon, total_samples) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": equal_allocation, "arm2": equal_allocation, "arm3": equal_allocation}) + + +if __name__ == "__main__": + T.run() diff --git a/moe/views/rest/bandit_epsilon.py b/moe/views/rest/bandit_epsilon.py index a89164c2..005a2f68 100644 --- a/moe/views/rest/bandit_epsilon.py +++ b/moe/views/rest/bandit_epsilon.py @@ -6,6 +6,7 @@ 2. pretty and backend views """ import colander +import copy from pyramid.view import view_config @@ -14,7 +15,7 @@ from moe.views.bandit_pretty_view import BanditPrettyView from moe.views.constant import BANDIT_EPSILON_ROUTE_NAME, BANDIT_EPSILON_PRETTY_ROUTE_NAME from moe.views.pretty_view import PRETTY_RENDERER -from moe.views.schemas import ArmAllocations, BanditEpsilonHyperparameterInfo, BanditHistoricalInfo +from moe.views.schemas import ArmAllocations, BanditHistoricalInfo, BANDIT_EPSILON_SUBTYPES_TO_HYPERPARAMETER_INFO_SCHEMA_CLASSES from moe.views.utils import _make_bandit_historical_info_from_params @@ -75,7 +76,10 @@ class BanditEpsilonRequest(colander.MappingSchema): missing=EPSILON_SUBTYPE_GREEDY, ) historical_info = BanditHistoricalInfo() - hyperparameter_info = BanditEpsilonHyperparameterInfo() + hyperparameter_info = colander.SchemaNode( + colander.Mapping(unknown='preserve'), + missing={}, + ) class BanditEpsilonResponse(colander.MappingSchema): @@ -125,6 +129,38 @@ class BanditEpsilonView(BanditPrettyView): "hyperparameter_info": {"epsilon": DEFAULT_EPSILON}, } + def get_params_from_request(self): + """Return the deserialized parameters from the json_body of a request. + + We explicitly pull out the ``hyparparameter_info`` and use it to deserialize and validate + the other parameters (epsilon, total_samples). + + This is necessary because we have different hyperparameters for + different subtypes. + + :returns: A deserialized self.request_schema object + :rtype: dict + + """ + # First we get the standard params (not including historical info) + params = super(BanditEpsilonView, self).get_params_from_request() + + # colander deserialized results are READ-ONLY. We will potentially be overwriting + # fields of ``params['optimizer_info']``, so we need to copy it first. + params['hyperparameter_info'] = copy.deepcopy(params['hyperparameter_info']) + + # Find the schma class that corresponds to the ``optimizer_type`` of the request + # optimizer_parameters has *not been validated yet*, so we need to validate manually. + schema_class = BANDIT_EPSILON_SUBTYPES_TO_HYPERPARAMETER_INFO_SCHEMA_CLASSES[params['subtype']]() + + # Deserialize and validate the parameters + validated_hyperparameter_info = schema_class.deserialize(params['hyperparameter_info']) + + # Put the now validated hyperparameter info back into the params dictionary to be consumed by the view + params['hyperparameter_info'] = validated_hyperparameter_info + + return params + @view_config(route_name=_pretty_route_name, renderer=PRETTY_RENDERER) def pretty_view(self): """A pretty, browser interactive view for the interface. Includes form request and response. @@ -153,9 +189,8 @@ def bandit_epsilon_view(self): subtype = params.get('subtype') historical_info = _make_bandit_historical_info_from_params(params) - epsilon = params.get('hyperparameter_info').get('epsilon') - bandit_class = EPSILON_SUBTYPES_TO_EPSILON_METHODS[subtype].bandit_class(historical_info=historical_info, epsilon=epsilon) + bandit_class = EPSILON_SUBTYPES_TO_EPSILON_METHODS[subtype].bandit_class(historical_info=historical_info, **params.get('hyperparameter_info')) return self.form_response({ 'endpoint': self._route_name, diff --git a/moe/views/schemas.py b/moe/views/schemas.py index 8b9a85d3..bb23c9fc 100644 --- a/moe/views/schemas.py +++ b/moe/views/schemas.py @@ -10,7 +10,7 @@ """ import colander -from moe.bandit.constant import DEFAULT_EPSILON +from moe.bandit.constant import DEFAULT_EPSILON, DEFAULT_TOTAL_SAMPLES, EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY from moe.bandit.data_containers import SampleArm from moe.optimal_learning.python.constant import GRADIENT_DESCENT_OPTIMIZER, TENSOR_PRODUCT_DOMAIN_TYPE, SQUARE_EXPONENTIAL_COVARIANCE_TYPE, NULL_OPTIMIZER, NEWTON_OPTIMIZER, DOMAIN_TYPES, OPTIMIZER_TYPES, COVARIANCE_TYPES, CONSTANT_LIAR_METHODS, DEFAULT_MAX_NUM_THREADS, MAX_ALLOWED_NUM_THREADS, DEFAULT_EXPECTED_IMPROVEMENT_MC_ITERATIONS, LIKELIHOOD_TYPES, LOG_MARGINAL_LIKELIHOOD, DEFAULT_CONSTANT_LIAR_METHOD, DEFAULT_CONSTANT_LIAR_LIE_NOISE_VARIANCE, DEFAULT_KRIGING_NOISE_VARIANCE, DEFAULT_KRIGING_STD_DEVIATION_COEF @@ -326,13 +326,14 @@ class CovarianceInfo(StrictMappingSchema): ) -class BanditEpsilonHyperparameterInfo(colander.MappingSchema): +class BanditEpsilonFirstHyperparameterInfo(StrictMappingSchema): - """The hyperparameter info needed for every Bandit Epsilon request. + """The hyperparameter info needed for every Bandit Epsilon-First request. **Required fields** - :epsilon: epsilon value for epsilon-greedy bandit. This strategy pulls the optimal arm (best expected return) with probability 1-epsilon. With probability epsilon a random arm is pulled. + :epsilon: epsilon value for epsilon bandits. This strategy pulls the optimal arm (best expected return) with probability 1-epsilon. With probability epsilon a random arm is pulled. + :total_samples: total number of samples for epsilon-first bandit. total_samples is T from :doc:`bandit`. """ @@ -342,6 +343,37 @@ class BanditEpsilonHyperparameterInfo(colander.MappingSchema): missing=DEFAULT_EPSILON, ) + total_samples = colander.SchemaNode( + colander.Int(), + validator=colander.Range(min=0), + missing=DEFAULT_TOTAL_SAMPLES, + ) + + +class BanditEpsilonGreedyHyperparameterInfo(StrictMappingSchema): + + """The hyperparameter info needed for every Bandit Epsilon-Greedy request. + + **Required fields** + + :epsilon: epsilon value for epsilon bandits. This strategy pulls the optimal arm (best expected return) with probability 1-epsilon. With probability epsilon a random arm is pulled. + + """ + + epsilon = colander.SchemaNode( + colander.Float(), + validator=colander.Range(min=0), + missing=DEFAULT_EPSILON, + ) + + +#: Mapping from bandit epsilon subtypes (:const:`moe.bandit.constant.EPSILON_SUBTYPES`) to +#: hyperparameter info schemas, e.g., :class:`moe.views.schemas.BanditEpsilonFirstHyperparameterInfo`. +BANDIT_EPSILON_SUBTYPES_TO_HYPERPARAMETER_INFO_SCHEMA_CLASSES = { + EPSILON_SUBTYPE_FIRST: BanditEpsilonFirstHyperparameterInfo, + EPSILON_SUBTYPE_GREEDY: BanditEpsilonGreedyHyperparameterInfo, + } + class GpHistoricalInfo(StrictMappingSchema): From a40389f6f42fe034c3274610acbf7cfbc417fa79 Mon Sep 17 00:00:00 2001 From: norases Date: Wed, 30 Jul 2014 19:08:17 -0700 Subject: [PATCH 2/6] Added tests for rest view. --- moe/bandit/__init__.py | 1 + moe/bandit/constant.py | 13 ++- moe/bandit/epsilon_first.py | 2 +- moe/tests/bandit/__init__.py | 1 + moe/tests/views/rest/bandit_epsilon_test.py | 91 +++++++++++++-------- moe/views/rest/bandit_epsilon.py | 3 +- 6 files changed, 71 insertions(+), 40 deletions(-) diff --git a/moe/bandit/__init__.py b/moe/bandit/__init__.py index 38c5143e..7cfa05bd 100644 --- a/moe/bandit/__init__.py +++ b/moe/bandit/__init__.py @@ -4,6 +4,7 @@ Contains: * :mod:`moe.bandit.epsilon.Epsilon` + * :mod:`moe.bandit.epsilon_greedy.EpsilonFirst` * :mod:`moe.bandit.epsilon_greedy.EpsilonGreedy` """ diff --git a/moe/bandit/constant.py b/moe/bandit/constant.py index 5b9173e1..7d58454c 100644 --- a/moe/bandit/constant.py +++ b/moe/bandit/constant.py @@ -8,10 +8,6 @@ } } -# Default Hyperparameters -DEFAULT_EPSILON = 0.05 -DEFAULT_TOTAL_SAMPLES = 100 - # Epsilon subtypes EPSILON_SUBTYPE_FIRST = 'first' EPSILON_SUBTYPE_GREEDY = 'greedy' @@ -19,3 +15,12 @@ EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY, ] + +# Default Hyperparameters +DEFAULT_EPSILON = 0.05 +DEFAULT_TOTAL_SAMPLES = 100 +EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS = { + EPSILON_SUBTYPE_FIRST: {'epsilon': DEFAULT_EPSILON, + 'total_samples': DEFAULT_TOTAL_SAMPLES}, + EPSILON_SUBTYPE_GREEDY: {'epsilon': DEFAULT_EPSILON}, + } diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py index e6e16682..0da95f95 100644 --- a/moe/bandit/epsilon_first.py +++ b/moe/bandit/epsilon_first.py @@ -103,7 +103,7 @@ def allocate_arms(self): arms_to_allocations[arm_name] = equal_allocation return arms_to_allocations - # Exploitation phase, trials 1,2,..., epsilon * T+1, ..., T + # Exploitation phase, trials epsilon * T+1, ..., T avg_payoff_arm_name_list = [] for arm_name, sampled_arm in arms_sampled.iteritems(): avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0 diff --git a/moe/tests/bandit/__init__.py b/moe/tests/bandit/__init__.py index eccf36b5..987476ad 100644 --- a/moe/tests/bandit/__init__.py +++ b/moe/tests/bandit/__init__.py @@ -21,6 +21,7 @@ **Files in this package** * :mod:`moe.tests.bandit.bandit_test_case`: base test case for bandit tests with a simple integration test case +* :mod:`moe.tests.bandit.epsilon_first_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonFirst` * :mod:`moe.tests.bandit.epsilon_greedy_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonGreedy` * :mod:`moe.tests.bandit.epsilon_test_case`: test cases for classes under :mod:`moe.bandit.epsilon.Epsilon` * :mod:`moe.tests.bandit.linkers_test`: tests for :mod:`moe.bandit.linkers` diff --git a/moe/tests/views/rest/bandit_epsilon_test.py b/moe/tests/views/rest/bandit_epsilon_test.py index 615cb02b..3caf8f56 100644 --- a/moe/tests/views/rest/bandit_epsilon_test.py +++ b/moe/tests/views/rest/bandit_epsilon_test.py @@ -6,7 +6,7 @@ import testify as T -from moe.bandit.constant import DEFAULT_EPSILON, EPSILON_SUBTYPE_GREEDY +from moe.bandit.constant import EPSILON_SUBTYPES, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS, EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY from moe.tests.bandit.bandit_test_case import BanditTestCase from moe.tests.views.rest_test_case import RestTestCase from moe.views.constant import BANDIT_EPSILON_MOE_ROUTE @@ -17,26 +17,22 @@ class TestBanditEpsilonViews(BanditTestCase, RestTestCase): """Integration test for the /bandit/epsilon endpoint.""" - precompute_gaussian_process_data = True - - def _build_json_payload(self, subtype, historical_info, epsilon): + def _build_json_payload(self, subtype, historical_info, hyperparameter_info): """Create a json_payload to POST to the /bandit/epsilon endpoint with all needed info.""" dict_to_dump = { 'subtype': subtype, 'historical_info': historical_info.json_payload(), - 'hyperparameter_info': { - 'epsilon': epsilon, - }, + 'hyperparameter_info': hyperparameter_info, } return json.dumps(dict_to_dump) - def test_hyperparameters_passed_through(self): - """Test that the hyperparameters get passed through to the endpoint.""" + def test_epsilon_greedy_hyperparameters_passed_through(self): + """Test that the hyperparameters get passed through to the epsilon-greedy endpoint.""" historical_info = self.one_arm_test_case # Test default test parameters get passed through - json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, DEFAULT_EPSILON)) + json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[EPSILON_SUBTYPE_GREEDY])) request = pyramid.testing.DummyRequest(post=json_payload) request.json_body = json_payload @@ -55,38 +51,65 @@ def test_hyperparameters_passed_through(self): T.assert_dicts_equal(params['hyperparameter_info'], json_payload['hyperparameter_info']) + def test_epsilon_first_hyperparameters_passed_through(self): + """Test that the hyperparameters get passed through to the epsilon-first endpoint.""" + historical_info = self.one_arm_test_case + + # Test default test parameters get passed through + json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_FIRST, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[EPSILON_SUBTYPE_FIRST])) + + request = pyramid.testing.DummyRequest(post=json_payload) + request.json_body = json_payload + view = BanditEpsilonView(request) + params = view.get_params_from_request() + + T.assert_dicts_equal(params['hyperparameter_info'], json_payload['hyperparameter_info']) + + # Test an arbitrary epsilon and total_tamples get passed through + json_payload['hyperparameter_info']['epsilon'] = 1.0 + json_payload['hyperparameter_info']['total_samples'] = 20000 + + request = pyramid.testing.DummyRequest(post=json_payload) + request.json_body = json_payload + view = BanditEpsilonView(request) + params = view.get_params_from_request() + + T.assert_dicts_equal(params['hyperparameter_info'], json_payload['hyperparameter_info']) + def test_historical_info_passed_through(self): """Test that the historical info get passed through to the endpoint.""" - for historical_info in self.historical_infos_to_test: - # Test default test parameters get passed through - json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, DEFAULT_EPSILON)) + for subtype in EPSILON_SUBTYPES: + for historical_info in self.historical_infos_to_test: + # Test default test parameters get passed through + json_payload = json.loads(self._build_json_payload(subtype, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[subtype])) - request = pyramid.testing.DummyRequest(post=json_payload) - request.json_body = json_payload - view = BanditEpsilonView(request) - params = view.get_params_from_request() + request = pyramid.testing.DummyRequest(post=json_payload) + request.json_body = json_payload + view = BanditEpsilonView(request) + params = view.get_params_from_request() - T.assert_dicts_equal(params['historical_info'], json_payload['historical_info']) + T.assert_dicts_equal(params['historical_info'], json_payload['historical_info']) def test_interface_returns_as_expected(self): """Integration test for the /bandit/epsilon endpoint.""" moe_route = BANDIT_EPSILON_MOE_ROUTE - for historical_info in self.historical_infos_to_test: - json_payload = self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, DEFAULT_EPSILON) - arm_names = set([arm_name for arm_name in historical_info.arms_sampled.iterkeys()]) - resp = self.testapp.post(moe_route.endpoint, json_payload) - resp_schema = BanditEpsilonResponse() - resp_dict = resp_schema.deserialize(json.loads(resp.body)) - resp_arm_names = set([arm_name for arm_name in resp_dict['arm_allocations'].iterkeys()]) - T.assert_sets_equal(arm_names, resp_arm_names) - # The allocations should be in range [0, 1] - # The sum of all allocations should be 1.0. - total_allocation = 0 - for allocation in resp_dict['arm_allocations'].itervalues(): - T.assert_gte(allocation, 0) - T.assert_lte(allocation, 1) - total_allocation += allocation - T.assert_equal(total_allocation, 1.0) + for subtype in EPSILON_SUBTYPES: + for historical_info in self.historical_infos_to_test: + json_payload = self._build_json_payload(subtype, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[subtype]) + arm_names = set([arm_name for arm_name in historical_info.arms_sampled.iterkeys()]) + resp = self.testapp.post(moe_route.endpoint, json_payload) + resp_schema = BanditEpsilonResponse() + resp_dict = resp_schema.deserialize(json.loads(resp.body)) + resp_arm_names = set([arm_name for arm_name in resp_dict['arm_allocations'].iterkeys()]) + T.assert_sets_equal(arm_names, resp_arm_names) + # The allocations should be in range [0, 1] + # The sum of all allocations should be 1.0. + total_allocation = 0 + for allocation in resp_dict['arm_allocations'].itervalues(): + T.assert_gte(allocation, 0) + T.assert_lte(allocation, 1) + total_allocation += allocation + T.assert_equal(total_allocation, 1.0) if __name__ == "__main__": diff --git a/moe/views/rest/bandit_epsilon.py b/moe/views/rest/bandit_epsilon.py index fd6a677f..9a197156 100644 --- a/moe/views/rest/bandit_epsilon.py +++ b/moe/views/rest/bandit_epsilon.py @@ -6,6 +6,7 @@ 2. pretty and backend views """ import colander + import copy from pyramid.view import view_config @@ -89,7 +90,7 @@ class BanditEpsilonResponse(colander.MappingSchema): **Output fields** :endpoint: the endpoint that was called - :arms: a dictionary of (arm name, allocaiton) key-value pairs (:class:`moe.views.schemas.ArmAllocations`) + :arms: a dictionary of (arm name, allocation) key-value pairs (:class:`moe.views.schemas.ArmAllocations`) :winner: winning arm name **Example Response** From 2598db812341dd2b88c9c8c923e96f627527d890 Mon Sep 17 00:00:00 2001 From: norases Date: Thu, 31 Jul 2014 19:05:56 -0700 Subject: [PATCH 3/6] Added documentation for epsilon-first. --- docs/bandit.rst | 3 +-- moe/bandit/__init__.py | 8 ++------ moe/bandit/epsilon_first.py | 3 ++- moe/tests/bandit/epsilon_first_test.py | 19 +++++++++++++------ moe/views/__init__.py | 4 +++- moe/views/rest/__init__.py | 10 ++++++++++ moe/views/schemas/__init__.py | 1 + 7 files changed, 32 insertions(+), 16 deletions(-) diff --git a/docs/bandit.rst b/docs/bandit.rst index d457b30d..1fc7a3bd 100644 --- a/docs/bandit.rst +++ b/docs/bandit.rst @@ -59,12 +59,12 @@ There are many different policies for this problem: We have implemented the following policies in our package: +* :mod:`~moe.bandit.epsilon_first.EpsilonFirst` * :mod:`~moe.bandit.epsilon_greedy.EpsilonGreedy` Other policies include: * Weighted random choice -* `Epsilon-first`_ * `Epsilon-decreasing`_ \* * `UCB-exp (Upper Confidence Bound)`_ \* * `UCB-tuned`_ \* @@ -73,7 +73,6 @@ Other policies include: \* Regret bounded as :math:`t \rightarrow \infty` -.. _Epsilon-first: http://en.wikipedia.org/wiki/Multi-armed_bandit#Semi-uniform_strategies .. _Epsilon-decreasing: http://en.wikipedia.org/wiki/Multi-armed_bandit#Semi-uniform_strategies .. _UCB-exp (Upper Confidence Bound): http://moodle.technion.ac.il/pluginfile.php/192340/mod_resource/content/0/UCB.pdf .. _UCB-tuned: http://moodle.technion.ac.il/pluginfile.php/192340/mod_resource/content/0/UCB.pdf diff --git a/moe/bandit/__init__.py b/moe/bandit/__init__.py index 7f6e2341..00c7d99e 100644 --- a/moe/bandit/__init__.py +++ b/moe/bandit/__init__.py @@ -3,14 +3,11 @@ **Files in this package** -<<<<<<< HEAD - * :mod:`moe.bandit.epsilon.Epsilon` - * :mod:`moe.bandit.epsilon_greedy.EpsilonFirst` - * :mod:`moe.bandit.epsilon_greedy.EpsilonGreedy` -======= * :mod:`moe.bandit.constant`: some default configuration values for ``optimal_learning`` components * :mod:`moe.bandit.data_containers`: :class:`~moe.bandit.data_containers.SampleArm` and :class:`~moe.bandit.data_containers.HistoricalData` containers for passing data to the ``bandit`` library +* :mod:`moe.bandit.epsilon_first`: :class:`~moe.bandit.epsilon_first.EpsilonFirst` + object for allocating bandit arms and choosing the winning arm based on epsilon-first policy. * :mod:`moe.bandit.epsilon_greedy`: :class:`~moe.bandit.epsilon_greedy.EpsilonGreedy` object for allocating bandit arms and choosing the winning arm based on epsilon-greedy policy. * :mod:`moe.bandit.epsilon`: a base :class:`~moe.bandit.epsilon.Epsilon` @@ -26,6 +23,5 @@ A set of abstract base classes (ABCs) defining an interface for interacting with ``bandit``. These consist of composable functions and classes to allocate bandit arms and choose arm. ->>>>>>> c802816b180e60ae732239d10f3e7f99ffb078cf """ diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py index 0da95f95..1cd65704 100644 --- a/moe/bandit/epsilon_first.py +++ b/moe/bandit/epsilon_first.py @@ -86,6 +86,7 @@ def allocate_arms(self): :return: the dictionary of (arm, allocation) key-value pairs :rtype: a dictionary of (String(), float64) pairs + """ arms_sampled = self._historical_info.arms_sampled num_arms = self._historical_info.num_arms @@ -110,7 +111,7 @@ def allocate_arms(self): avg_payoff_arm_name_list.append((avg_payoff, arm_name)) avg_payoff_arm_name_list.sort(reverse=True) - best_payoff, _ = avg_payoff_arm_name_list[0] + best_payoff, _ = max(avg_payoff_arm_name_list) # Filter out arms that have average payoff less than the best payoff winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list) # Extract a list of winning arm names from a list of (average payoff, arm name) tuples. diff --git a/moe/tests/bandit/epsilon_first_test.py b/moe/tests/bandit/epsilon_first_test.py index cfe8c799..9c1265b9 100644 --- a/moe/tests/bandit/epsilon_first_test.py +++ b/moe/tests/bandit/epsilon_first_test.py @@ -32,7 +32,7 @@ def test_one_arm(self): T.assert_equal(bandit.choose_arm(), "arm1") def test_two_new_arms(self): - """Check that the two-new-arms case always allocate each arm equally (the allocation is 0.5 for both arms).""" + """Check that the two-new-arms case always allocate each arm equally (the allocation is 0.5 for both arms). This tests num_winning_arms == num_arms > 1.""" for epsilon in self.epsilons_to_test: for total_samples in self.total_samples_to_test: bandit = self.bandit_class(self.two_new_arms_test_case, epsilon, total_samples) @@ -53,20 +53,27 @@ def test_two_arms_epsilon_one(self): def test_three_arms_explore(self): """Check that the three-arms cases with integer and float payoffs in exploration phase return the expected arm allocations.""" - epsilon = 0.5 + epsilon = 0.7 total_samples = 10 + equal_allocation = 1.0 / 3 for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]: bandit = self.bandit_class(historical_info, epsilon, total_samples) - T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0, "arm3": 0.0}) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": equal_allocation, "arm2": equal_allocation, "arm3": equal_allocation}) def test_three_arms_exploit(self): """Check that the three-arms cases with integer and float payoffs in exploitation phase return the expected arm allocations.""" - epsilon = 0.7 + epsilon = 0.5 total_samples = 10 - equal_allocation = 1.0 / 3 for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]: bandit = self.bandit_class(historical_info, epsilon, total_samples) - T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": equal_allocation, "arm2": equal_allocation, "arm3": equal_allocation}) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0, "arm3": 0.0}) + + def test_three_arms_exploit_two_winners(self): + """Check that the three-arms cases with two winners in exploitation phase return the expected arm allocations. This tests num_arms > num_winning_arms > 1.""" + epsilon = 0.5 + total_samples = 10 + bandit = self.bandit_class(self.three_arms_two_winners_test_case, epsilon, total_samples) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5, "arm3": 0.0}) if __name__ == "__main__": diff --git a/moe/views/__init__.py b/moe/views/__init__.py index cb01da4f..dd3babda 100644 --- a/moe/views/__init__.py +++ b/moe/views/__init__.py @@ -5,7 +5,9 @@ * :mod:`moe.views.frontend`: the frontend code * :mod:`moe.views.rest`: various REST endpoints for internal gaussian process information - * :mod:`moe.views.gp_pretty_view`: base view for all REST endpoints + * :mod:`moe.views.pretty_view`: base view for all REST endpoints + * :mod:`moe.views.bandit_pretty_view`: base view for all bandit REST endpoints + * :mod:`moe.views.gp_pretty_view`: base view for all GP REST endpoints * :mod:`moe.views.optimizable_gp_pretty_view`: base view for REST endpoints that require optimization * :mod:`moe.views.gp_next_points_pretty_view`: base view for getting the next best points to sample * :mod:`moe.views.schemas`: schemas used to deserialize/serialize inputs/outputs in the REST interface diff --git a/moe/views/rest/__init__.py b/moe/views/rest/__init__.py index d05ed654..ab954c9f 100644 --- a/moe/views/rest/__init__.py +++ b/moe/views/rest/__init__.py @@ -45,4 +45,14 @@ .. http:get:: /gp/next_points/kriging/pretty +**Bandit endpoints:** + + * :mod:`~moe.views.rest.bandit_epsilon` + + .. http:post:: /bandit/epsilon + + Calculates the arm allocations and the best arm to pull next, given subtype, historical data, hyperparameters. + + .. http:get:: /bandit/epsilon/pretty + """ diff --git a/moe/views/schemas/__init__.py b/moe/views/schemas/__init__.py index cc8820f7..b217ea6b 100644 --- a/moe/views/schemas/__init__.py +++ b/moe/views/schemas/__init__.py @@ -3,6 +3,7 @@ Contains: + * :mod:`moe.views.schemas.bandit_pretty_view`: common schemas for the ``bandit_*`` endpoints * :mod:`moe.views.schemas.base_schemas`: basic building-block schemas for use in other, more complex schemas * :mod:`moe.views.schemas.gp_next_points_pretty_view`: common schemas for the ``gp_next_points_*`` endpoints * :mod:`moe.views.rest`: schemas for specific REST endpoints From 2d2e7e692ddf9d3529b99fd96f7fe7c74632098e Mon Sep 17 00:00:00 2001 From: norases Date: Fri, 1 Aug 2014 14:35:50 -0700 Subject: [PATCH 4/6] Addressed Scott's and Eric's comments, moved duplicate method _get_winning_arm_names out to superclass Epsilon. --- moe/bandit/epsilon.py | 32 +++++++++++++++++-- moe/bandit/epsilon_first.py | 42 ++++++++++--------------- moe/bandit/epsilon_greedy.py | 18 +++-------- moe/views/schemas/bandit_pretty_view.py | 2 +- 4 files changed, 52 insertions(+), 42 deletions(-) diff --git a/moe/bandit/epsilon.py b/moe/bandit/epsilon.py index 09eb47eb..a07034bc 100644 --- a/moe/bandit/epsilon.py +++ b/moe/bandit/epsilon.py @@ -4,16 +4,17 @@ See :class:`moe.bandit.interfaces.bandit_interface` for further details on bandit. """ - import copy +import numpy + from moe.bandit.constant import DEFAULT_EPSILON from moe.bandit.interfaces.bandit_interface import BanditInterface class Epsilon(BanditInterface): - r"""Implementation of the constructor of Epsilon. Abstract method allocate_arms implemented in subclass. + r"""Implementation of the constructor and common methods of Epsilon. Abstract method allocate_arms implemented in subclass. A class to encapsulate the computation of bandit epsilon. Epsilon is the sole hyperparameter in this class. Subclasses may contain other hyperparameters. @@ -41,3 +42,30 @@ def __init__( self._historical_info = copy.deepcopy(historical_info) self._subtype = subtype self._epsilon = epsilon + + def _get_winning_arm_names(self, arms_sampled): + r"""Compute the set of winning arm names based on the given ``arms_sampled``.. + + Throws an exception when arms_sampled is empty. + Implementers of this interface will never override this method. + + :return: of set of names of the winning arms + :rtype: frozenset(String()) + + """ + if not arms_sampled: + raise ValueError('sample_arms is empty!') + + avg_payoff_arm_name_list = [] + for arm_name, sampled_arm in arms_sampled.iteritems(): + avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0 + avg_payoff_arm_name_list.append((avg_payoff, arm_name)) + avg_payoff_arm_name_list.sort(reverse=True) + + best_payoff, _ = max(avg_payoff_arm_name_list) + # Filter out arms that have average payoff less than the best payoff + winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list) + # Extract a list of winning arm names from a list of (average payoff, arm name) tuples. + _, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list)) + winning_arm_names = frozenset(winning_arm_name_list) + return winning_arm_names diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py index 1cd65704..6e848d73 100644 --- a/moe/bandit/epsilon_first.py +++ b/moe/bandit/epsilon_first.py @@ -16,8 +16,8 @@ class EpsilonFirst(Epsilon): A class to encapsulate the computation of bandit epsilon first. - total_samples is the total number of samples (#to sample + #sampled) - #sampled is calculated by summing up total from each arm sampled. + total_samples is the total number of samples (number to sample + number sampled) + number sampled is calculated by summing up total from each arm sampled. total_samples is T from :doc:`bandit`. See superclass :class:`moe.bandit.epsilon.Epsilon` for further details. @@ -32,8 +32,8 @@ def __init__( ): """Construct an EpsilonFirst object. See superclass :class:`moe.bandit.epsilon.Epsilon` for details. - total_samples is the total number of samples (#to sample + #sampled) - #sampled is calculated by summing up total from each arm sampled. + total_samples is the total number of samples (number to sample + number sampled) + number sampled is calculated by summing up total from each arm sampled. total_samples is T from :doc:`bandit`. """ @@ -57,32 +57,33 @@ def allocate_arms(self): If we have a total of T trials, the first :math:`\epsilon` T trials, we only explore. After that, we only exploit (t = :math:`\epsilon` T, :math:`\epsilon` T + 1, ..., T). - In other words, this method will pull a random arm in the exploration phase. + This method will pull a random arm in the exploration phase. Then this method will pull the optimal arm (best expected return) in the exploitation phase. - In case of a tie in the exploitation phase, the method will split the probability 1 among the optimal arms. + In case of a tie in the exploitation phase, the method will split the allocation among the optimal arms. - For example, if we have three arms, two arms (arm1 and arm2) with an average payoff of 0.5 ({win:10, losee:10, total:20}) + For example, if we have three arms, two arms (arm1 and arm2) with an average payoff of 0.5 + (``{win:10, lose:10, total:20}``) and a new arm (arm3, average payoff is 0 and total is 0). Let the epsilon :math:`\epsilon` be 0.1. The allocation depends on which phase we are in: - Case 1: T = 50 + *Case 1: T = 50* - Recall that T = #to sample + #sampled. #sampled = 20 + 20 + 0 = 40. + Recall that T = number to sample + number sampled. number sampled :math:`= 20 + 20 + 0 = 40`. So we are on trial #41. We explore the first :math:`\epsilon T = 0.1 * 50 = 5` trials - and thus we are in the exploitation phase. We split probability 1 between the optimal arms arm1 and arm2. + and thus we are in the exploitation phase. We split the allocation between the optimal arms arm1 and arm2. - arm1: 0.5, arm2: 0.5, arm3: 0.0. + ``{arm1: 0.5, arm2: 0.5, arm3: 0.0}`` - Case 2: T = 500 + *Case 2: T = 500* We explore the first :math:`\epsilon T = 0.1 * 500 = 50` trials. Since we are on trail #41, we are in the exploration phase. We choose arms randomly: - arm1: 0.33, arm2: 0.33, arm3: 0.33. + ``{arm1: 0.33, arm2: 0.33, arm3: 0.33}`` :return: the dictionary of (arm, allocation) key-value pairs :rtype: a dictionary of (String(), float64) pairs @@ -92,7 +93,7 @@ def allocate_arms(self): num_arms = self._historical_info.num_arms if not arms_sampled: - raise ValueError('sample_arms are empty!') + raise ValueError('sample_arms is empty!') num_sampled = sum([sampled_arm.total for sampled_arm in arms_sampled.itervalues()]) # Exploration phase, trials 1,2,..., epsilon * T @@ -105,18 +106,7 @@ def allocate_arms(self): return arms_to_allocations # Exploitation phase, trials epsilon * T+1, ..., T - avg_payoff_arm_name_list = [] - for arm_name, sampled_arm in arms_sampled.iteritems(): - avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0 - avg_payoff_arm_name_list.append((avg_payoff, arm_name)) - avg_payoff_arm_name_list.sort(reverse=True) - - best_payoff, _ = max(avg_payoff_arm_name_list) - # Filter out arms that have average payoff less than the best payoff - winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list) - # Extract a list of winning arm names from a list of (average payoff, arm name) tuples. - _, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list)) - winning_arm_names = frozenset(winning_arm_name_list) + winning_arm_names = self._get_winning_arm_names(arms_sampled) num_winning_arms = len(winning_arm_names) arms_to_allocations = {} diff --git a/moe/bandit/epsilon_greedy.py b/moe/bandit/epsilon_greedy.py index 2d8843e0..207128f1 100644 --- a/moe/bandit/epsilon_greedy.py +++ b/moe/bandit/epsilon_greedy.py @@ -68,18 +68,10 @@ def allocate_arms(self): num_arms = self._historical_info.num_arms if not arms_sampled: raise ValueError('sample_arms are empty!') - avg_payoff_arm_name_list = [] - for arm_name, sampled_arm in arms_sampled.iteritems(): - avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0 - avg_payoff_arm_name_list.append((avg_payoff, arm_name)) - - best_payoff, _ = max(avg_payoff_arm_name_list) - # Filter out arms that have average payoff less than the best payoff - winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list) - # Extract a list of winning arm names from a list of (average payoff, arm name) tuples. - _, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list)) - - num_winning_arms = len(winning_arm_name_list) + + winning_arm_names = self._get_winning_arm_names(arms_sampled) + + num_winning_arms = len(winning_arm_names) epsilon_allocation = self._epsilon / num_arms arms_to_allocations = {} @@ -89,7 +81,7 @@ def allocate_arms(self): # With probability 1-epsilon, split allocation among winning arms. winning_arm_allocation = (1.0 - self._epsilon) / num_winning_arms - for winning_arm_name in winning_arm_name_list: + for winning_arm_name in winning_arm_names: arms_to_allocations[winning_arm_name] += winning_arm_allocation return arms_to_allocations diff --git a/moe/views/schemas/bandit_pretty_view.py b/moe/views/schemas/bandit_pretty_view.py index f01b1773..2de0a214 100644 --- a/moe/views/schemas/bandit_pretty_view.py +++ b/moe/views/schemas/bandit_pretty_view.py @@ -72,7 +72,7 @@ class BanditEpsilonFirstHyperparameterInfo(base_schemas.StrictMappingSchema): **Required fields** :ivar epsilon: (*0.0 <= float64 <= 1.0*) epsilon value for epsilon-first bandit. This strategy pulls the optimal arm - (best expected return) with if it is in exploitation phase (#sampled > epsilon * total_samples). Otherwise a random arm is pulled (exploration). + (best expected return) with if it is in exploitation phase (number sampled > epsilon * total_samples). Otherwise a random arm is pulled (exploration). :ivar total_samples: total number of samples for epsilon-first bandit. total_samples is T from :doc:`bandit`. """ From 77aff65e2b5054ab2da1bfa6dea9d2664c0b719b Mon Sep 17 00:00:00 2001 From: norases Date: Fri, 1 Aug 2014 16:19:46 -0700 Subject: [PATCH 5/6] Addressed Eric's comments. Wrote test for static function in class Epsilon --- CHANGELOG.md | 3 ++- moe/bandit/epsilon.py | 7 +++++-- moe/bandit/epsilon_first.py | 4 +--- moe/bandit/epsilon_greedy.py | 4 +--- moe/tests/bandit/__init__.py | 1 + moe/tests/bandit/epsilon_test.py | 27 +++++++++++++++++++++++++ moe/views/rest/bandit_epsilon.py | 6 +++--- moe/views/schemas/bandit_pretty_view.py | 6 +++--- 8 files changed, 43 insertions(+), 15 deletions(-) create mode 100644 moe/tests/bandit/epsilon_test.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 221273ac..7d9d2ce9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ * Features * Added multi-armed bandit endpoint. (#255) - * Implemented epsilon-greedy. + * Implemented epsilon-greedy. (#255) + * Implemented epsilon-first. (#335) * Added support for the L-BFGS-B optimizer. (#296) * Changes diff --git a/moe/bandit/epsilon.py b/moe/bandit/epsilon.py index a07034bc..a0dce0ae 100644 --- a/moe/bandit/epsilon.py +++ b/moe/bandit/epsilon.py @@ -32,7 +32,7 @@ def __init__( """Construct an Epsilon object. :param historical_info: a dictionary of arms sampled - :type historical_info: dictionary of (String(), SingleArm()) pairs + :type historical_info: dictionary of (String(), SampleArm()) pairs (see :class:`moe.bandit.data_containers.SampleArm` for more details) :param subtype: subtype of the epsilon bandit algorithm (default: None) :type subtype: String() :param epsilon: epsilon hyperparameter for the epsilon bandit algorithm (default: :const:`~moe.bandit.constant.DEFAULT_EPSILON`) @@ -43,12 +43,15 @@ def __init__( self._subtype = subtype self._epsilon = epsilon - def _get_winning_arm_names(self, arms_sampled): + @staticmethod + def get_winning_arm_names(arms_sampled): r"""Compute the set of winning arm names based on the given ``arms_sampled``.. Throws an exception when arms_sampled is empty. Implementers of this interface will never override this method. + :param arms_sampled: a dictionary of arm name to :class:`moe.bandit.data_containers.SampleArm` + :type arms_sampled: dictionary of (String(), SampleArm()) pairs :return: of set of names of the winning arms :rtype: frozenset(String()) diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py index 6e848d73..a099f877 100644 --- a/moe/bandit/epsilon_first.py +++ b/moe/bandit/epsilon_first.py @@ -4,8 +4,6 @@ See :class:`moe.bandit.epsilon.Epsilon` for further details on bandit. """ -import numpy - from moe.bandit.constant import DEFAULT_EPSILON, DEFAULT_TOTAL_SAMPLES, EPSILON_SUBTYPE_FIRST from moe.bandit.epsilon import Epsilon @@ -106,7 +104,7 @@ def allocate_arms(self): return arms_to_allocations # Exploitation phase, trials epsilon * T+1, ..., T - winning_arm_names = self._get_winning_arm_names(arms_sampled) + winning_arm_names = self.get_winning_arm_names(arms_sampled) num_winning_arms = len(winning_arm_names) arms_to_allocations = {} diff --git a/moe/bandit/epsilon_greedy.py b/moe/bandit/epsilon_greedy.py index 207128f1..ffdad037 100644 --- a/moe/bandit/epsilon_greedy.py +++ b/moe/bandit/epsilon_greedy.py @@ -4,8 +4,6 @@ See :class:`moe.bandit.epsilon.Epsilon` for further details on this bandit. """ -import numpy - from moe.bandit.constant import DEFAULT_EPSILON, EPSILON_SUBTYPE_GREEDY from moe.bandit.epsilon import Epsilon @@ -69,7 +67,7 @@ def allocate_arms(self): if not arms_sampled: raise ValueError('sample_arms are empty!') - winning_arm_names = self._get_winning_arm_names(arms_sampled) + winning_arm_names = self.get_winning_arm_names(arms_sampled) num_winning_arms = len(winning_arm_names) epsilon_allocation = self._epsilon / num_arms diff --git a/moe/tests/bandit/__init__.py b/moe/tests/bandit/__init__.py index 987476ad..656ba3d7 100644 --- a/moe/tests/bandit/__init__.py +++ b/moe/tests/bandit/__init__.py @@ -23,6 +23,7 @@ * :mod:`moe.tests.bandit.bandit_test_case`: base test case for bandit tests with a simple integration test case * :mod:`moe.tests.bandit.epsilon_first_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonFirst` * :mod:`moe.tests.bandit.epsilon_greedy_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonGreedy` +* :mod:`moe.tests.bandit.epsilon_test`: tests for :mod:`moe.bandit.epsilon_greedy.Epsilon` * :mod:`moe.tests.bandit.epsilon_test_case`: test cases for classes under :mod:`moe.bandit.epsilon.Epsilon` * :mod:`moe.tests.bandit.linkers_test`: tests for :mod:`moe.bandit.linkers` diff --git a/moe/tests/bandit/epsilon_test.py b/moe/tests/bandit/epsilon_test.py new file mode 100644 index 00000000..318dc1ac --- /dev/null +++ b/moe/tests/bandit/epsilon_test.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +"""Test epsilon bandit implementation (functions common to epsilon bandit). + +Test functions in :class:`moe.bandit.epsilon.Epsilon` + +""" +import testify as T + +from moe.bandit.epsilon import Epsilon +from moe.tests.bandit.epsilon_test_case import EpsilonTestCase + + +class EpsilonTest(EpsilonTestCase): + + """Verify that different sample_arms return correct results.""" + + def test_two_new_arms(self): + """Check that the two-new-arms case always returns both arms as winning arms. This tests num_winning_arms == num_arms > 1.""" + T.assert_sets_equal(Epsilon.get_winning_arm_names(self.two_new_arms_test_case.arms_sampled), frozenset(["arm1", "arm2"])) + + def test_three_arms_two_winners(self): + """Check that the three-arms cases with two winners return the expected winning arms. This tests num_arms > num_winning_arms > 1.""" + T.assert_sets_equal(Epsilon.get_winning_arm_names(self.three_arms_two_winners_test_case.arms_sampled), frozenset(["arm1", "arm2"])) + + +if __name__ == "__main__": + T.run() diff --git a/moe/views/rest/bandit_epsilon.py b/moe/views/rest/bandit_epsilon.py index e605b815..130c8ea1 100644 --- a/moe/views/rest/bandit_epsilon.py +++ b/moe/views/rest/bandit_epsilon.py @@ -53,11 +53,11 @@ def get_params_from_request(self): params = super(BanditEpsilonView, self).get_params_from_request() # colander deserialized results are READ-ONLY. We will potentially be overwriting - # fields of ``params['optimizer_info']``, so we need to copy it first. + # fields of ``params['hyperparameter_info']``, so we need to copy it first. params['hyperparameter_info'] = copy.deepcopy(params['hyperparameter_info']) - # Find the schma class that corresponds to the ``optimizer_type`` of the request - # optimizer_parameters has *not been validated yet*, so we need to validate manually. + # Find the schema class that corresponds to the ``subtype`` of the request + # hyperparameter_info has *not been validated yet*, so we need to validate manually. schema_class = BANDIT_EPSILON_SUBTYPES_TO_HYPERPARAMETER_INFO_SCHEMA_CLASSES[params['subtype']]() # Deserialize and validate the parameters diff --git a/moe/views/schemas/bandit_pretty_view.py b/moe/views/schemas/bandit_pretty_view.py index 2de0a214..d6c337f4 100644 --- a/moe/views/schemas/bandit_pretty_view.py +++ b/moe/views/schemas/bandit_pretty_view.py @@ -73,13 +73,13 @@ class BanditEpsilonFirstHyperparameterInfo(base_schemas.StrictMappingSchema): :ivar epsilon: (*0.0 <= float64 <= 1.0*) epsilon value for epsilon-first bandit. This strategy pulls the optimal arm (best expected return) with if it is in exploitation phase (number sampled > epsilon * total_samples). Otherwise a random arm is pulled (exploration). - :ivar total_samples: total number of samples for epsilon-first bandit. total_samples is T from :doc:`bandit`. + :ivar total_samples: (*int >= 0*) total number of samples for epsilon-first bandit. total_samples is T from :doc:`bandit`. """ epsilon = colander.SchemaNode( colander.Float(), - validator=colander.Range(min=0), + validator=colander.Range(min=0.0, max=1.0), missing=DEFAULT_EPSILON, ) @@ -103,7 +103,7 @@ class BanditEpsilonGreedyHyperparameterInfo(base_schemas.StrictMappingSchema): epsilon = colander.SchemaNode( colander.Float(), - validator=colander.Range(min=0), + validator=colander.Range(min=0.0, max=1.0), missing=DEFAULT_EPSILON, ) From ec4c69411e2030fa81435aaf5e72ffe40538f6a2 Mon Sep 17 00:00:00 2001 From: norases Date: Fri, 1 Aug 2014 16:36:38 -0700 Subject: [PATCH 6/6] Added ValueError test for Epsilon class and comments about raising ValueError. --- moe/bandit/epsilon.py | 3 ++- moe/bandit/epsilon_first.py | 1 + moe/bandit/epsilon_greedy.py | 1 + moe/tests/bandit/epsilon_test.py | 16 ++++++++++++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/moe/bandit/epsilon.py b/moe/bandit/epsilon.py index a0dce0ae..8de04f6c 100644 --- a/moe/bandit/epsilon.py +++ b/moe/bandit/epsilon.py @@ -54,10 +54,11 @@ def get_winning_arm_names(arms_sampled): :type arms_sampled: dictionary of (String(), SampleArm()) pairs :return: of set of names of the winning arms :rtype: frozenset(String()) + :raise: ValueError when ``arms_sampled`` are empty. """ if not arms_sampled: - raise ValueError('sample_arms is empty!') + raise ValueError('arms_sampled is empty!') avg_payoff_arm_name_list = [] for arm_name, sampled_arm in arms_sampled.iteritems(): diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py index a099f877..d5e48b1c 100644 --- a/moe/bandit/epsilon_first.py +++ b/moe/bandit/epsilon_first.py @@ -85,6 +85,7 @@ def allocate_arms(self): :return: the dictionary of (arm, allocation) key-value pairs :rtype: a dictionary of (String(), float64) pairs + :raise: ValueError when ``sample_arms`` are empty. """ arms_sampled = self._historical_info.arms_sampled diff --git a/moe/bandit/epsilon_greedy.py b/moe/bandit/epsilon_greedy.py index ffdad037..d1dfa932 100644 --- a/moe/bandit/epsilon_greedy.py +++ b/moe/bandit/epsilon_greedy.py @@ -60,6 +60,7 @@ def allocate_arms(self): :return: the dictionary of (arm, allocation) key-value pairs :rtype: a dictionary of (String(), float64) pairs + :raise: ValueError when ``sample_arms`` are empty. """ arms_sampled = self._historical_info.arms_sampled diff --git a/moe/tests/bandit/epsilon_test.py b/moe/tests/bandit/epsilon_test.py index 318dc1ac..0ba37d57 100644 --- a/moe/tests/bandit/epsilon_test.py +++ b/moe/tests/bandit/epsilon_test.py @@ -4,6 +4,8 @@ Test functions in :class:`moe.bandit.epsilon.Epsilon` """ +import logging + import testify as T from moe.bandit.epsilon import Epsilon @@ -14,6 +16,20 @@ class EpsilonTest(EpsilonTestCase): """Verify that different sample_arms return correct results.""" + @T.class_setup + def disable_logging(self): + """Disable logging (for the duration of this test case).""" + logging.disable(logging.CRITICAL) + + @T.class_teardown + def enable_logging(self): + """Re-enable logging (so other test cases are unaffected).""" + logging.disable(logging.NOTSET) + + def test_empty_arm_invalid(self): + """Test empty ``sample_arms`` causes an ValueError.""" + T.assert_raises(ValueError, Epsilon.get_winning_arm_names, {}) + def test_two_new_arms(self): """Check that the two-new-arms case always returns both arms as winning arms. This tests num_winning_arms == num_arms > 1.""" T.assert_sets_equal(Epsilon.get_winning_arm_names(self.two_new_arms_test_case.arms_sampled), frozenset(["arm1", "arm2"]))