From a89814659d38ee333c0d4fe24eef6cd56a088cd3 Mon Sep 17 00:00:00 2001
From: norases <norases@gmail.com>
Date: Tue, 29 Jul 2014 15:50:01 -0700
Subject: [PATCH 1/6] Added Epsilon-First code and updated the view.

---
 moe/bandit/constant.py                 |   5 +-
 moe/bandit/epsilon_first.py            | 128 +++++++++++++++++++++++++
 moe/bandit/linkers.py                  |   7 +-
 moe/tests/bandit/epsilon_first_test.py |  73 ++++++++++++++
 moe/views/rest/bandit_epsilon.py       |  43 ++++++++-
 moe/views/schemas.py                   |  40 +++++++-
 6 files changed, 286 insertions(+), 10 deletions(-)
 create mode 100644 moe/bandit/epsilon_first.py
 create mode 100644 moe/tests/bandit/epsilon_first_test.py

diff --git a/moe/bandit/constant.py b/moe/bandit/constant.py
index 1653997e..489e1b98 100644
--- a/moe/bandit/constant.py
+++ b/moe/bandit/constant.py
@@ -8,8 +8,11 @@
                                             }
                            }
 
+# Default Hyperparameters
 DEFAULT_EPSILON = 0.05
+DEFAULT_TOTAL_SAMPLES = 100
 
 # Epsilon subtypes
+EPSILON_SUBTYPE_FIRST = 'first'
 EPSILON_SUBTYPE_GREEDY = 'greedy'
-EPSILON_SUBTYPES = [EPSILON_SUBTYPE_GREEDY]
+EPSILON_SUBTYPES = [EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY]
diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py
new file mode 100644
index 00000000..e6e16682
--- /dev/null
+++ b/moe/bandit/epsilon_first.py
@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+"""Classes (Python) to compute the Bandit Epsilon-First arm allocation and choosing the arm to pull next.
+
+See :class:`moe.bandit.epsilon.Epsilon` for further details on bandit.
+
+"""
+import numpy
+
+from moe.bandit.constant import DEFAULT_EPSILON, DEFAULT_TOTAL_SAMPLES, EPSILON_SUBTYPE_FIRST
+from moe.bandit.epsilon import Epsilon
+
+
+class EpsilonFirst(Epsilon):
+
+    r"""Implementation of EpsilonFirst.
+
+    A class to encapsulate the computation of bandit epsilon first.
+
+    total_samples is the total number of samples (#to sample + #sampled)
+    #sampled is calculated by summing up total from each arm sampled.
+    total_samples is T from :doc:`bandit`.
+
+    See superclass :class:`moe.bandit.epsilon.Epsilon` for further details.
+
+    """
+
+    def __init__(
+            self,
+            historical_info,
+            epsilon=DEFAULT_EPSILON,
+            total_samples=DEFAULT_TOTAL_SAMPLES,
+    ):
+        """Construct an EpsilonFirst object. See superclass :class:`moe.bandit.epsilon.Epsilon` for details.
+
+        total_samples is the total number of samples (#to sample + #sampled)
+        #sampled is calculated by summing up total from each arm sampled.
+        total_samples is T from :doc:`bandit`.
+
+        """
+        super(EpsilonFirst, self).__init__(
+            historical_info=historical_info,
+            subtype=EPSILON_SUBTYPE_FIRST,
+            epsilon=epsilon,
+            )
+        self._total_samples = total_samples
+
+    def allocate_arms(self):
+        r"""Compute the allocation to each arm given ``historical_info``, running bandit ``subtype`` endpoint with hyperparameters in ``hyperparameter_info``.
+
+        Computes the allocation to each arm based on the given subtype, historical info, and hyperparameter info.
+
+        Works with k-armed bandits (k >= 1).
+
+        The Algorithm: http://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions
+
+        This method starts with a pure exploration phase, followed by a pure exploitation phase.
+        If we have a total of T trials, the first :math:`\epsilon` T trials, we only explore.
+        After that, we only exploit (t = :math:`\epsilon` T, :math:`\epsilon` T + 1, ..., T).
+
+        In other words, this method will pull a random arm in the exploration phase.
+        Then this method will pull the optimal arm (best expected return) in the exploitation phase.
+
+        In case of a tie in the exploitation phase, the method will split the probability 1 among the optimal arms.
+
+        For example, if we have three arms, two arms (arm1 and arm2) with an average payoff of 0.5 ({win:10, losee:10, total:20})
+        and a new arm (arm3, average payoff is 0 and total is 0).
+
+        Let the epsilon :math:`\epsilon` be 0.1.
+
+        The allocation depends on which phase we are in:
+
+        Case 1: T = 50
+
+        Recall that T = #to sample + #sampled. #sampled = 20 + 20 + 0 = 40.
+        So we are on trial #41. We explore the first :math:`\epsilon T = 0.1 * 50 = 5` trials
+        and thus we are in the exploitation phase. We split probability 1 between the optimal arms arm1 and arm2.
+
+        arm1: 0.5, arm2: 0.5, arm3: 0.0.
+
+        Case 2: T = 500
+
+        We explore the first :math:`\epsilon T = 0.1 * 500 = 50` trials.
+        Since we are on trail #41, we are in the exploration phase. We choose arms randomly:
+
+        arm1: 0.33, arm2: 0.33, arm3: 0.33.
+
+        :return: the dictionary of (arm, allocation) key-value pairs
+        :rtype: a dictionary of (String(), float64) pairs
+        """
+        arms_sampled = self._historical_info.arms_sampled
+        num_arms = self._historical_info.num_arms
+
+        if not arms_sampled:
+            raise ValueError('sample_arms are empty!')
+
+        num_sampled = sum([sampled_arm.total for sampled_arm in arms_sampled.itervalues()])
+        # Exploration phase, trials 1,2,..., epsilon * T
+        # Allocate equal probability to all arms
+        if num_sampled < self._total_samples * self._epsilon:
+            equal_allocation = 1.0 / num_arms
+            arms_to_allocations = {}
+            for arm_name in arms_sampled.iterkeys():
+                arms_to_allocations[arm_name] = equal_allocation
+            return arms_to_allocations
+
+        # Exploitation phase, trials 1,2,..., epsilon * T+1, ..., T
+        avg_payoff_arm_name_list = []
+        for arm_name, sampled_arm in arms_sampled.iteritems():
+            avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0
+            avg_payoff_arm_name_list.append((avg_payoff, arm_name))
+        avg_payoff_arm_name_list.sort(reverse=True)
+
+        best_payoff, _ = avg_payoff_arm_name_list[0]
+        # Filter out arms that have average payoff less than the best payoff
+        winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list)
+        # Extract a list of winning arm names from a list of (average payoff, arm name) tuples.
+        _, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list))
+        winning_arm_names = frozenset(winning_arm_name_list)
+
+        num_winning_arms = len(winning_arm_names)
+        arms_to_allocations = {}
+
+        winning_arm_allocation = 1.0 / num_winning_arms
+        # Split allocation among winning arms, all other arms get allocation of 0.
+        for arm_name in arms_sampled.iterkeys():
+            arms_to_allocations[arm_name] = winning_arm_allocation if arm_name in winning_arm_names else 0.0
+
+        return arms_to_allocations
diff --git a/moe/bandit/linkers.py b/moe/bandit/linkers.py
index e5727dc3..a7a5c8bf 100644
--- a/moe/bandit/linkers.py
+++ b/moe/bandit/linkers.py
@@ -2,7 +2,8 @@
 """Links between the implementations of bandit algorithms."""
 from collections import namedtuple
 
-from moe.bandit.constant import EPSILON_SUBTYPE_GREEDY
+from moe.bandit.constant import EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY
+from moe.bandit.epsilon_first import EpsilonFirst
 from moe.bandit.epsilon_greedy import EpsilonGreedy
 
 # Epsilon
@@ -16,6 +17,10 @@
 
 
 EPSILON_SUBTYPES_TO_EPSILON_METHODS = {
+        EPSILON_SUBTYPE_FIRST: EpsilonMethod(
+            subtype=EPSILON_SUBTYPE_FIRST,
+            bandit_class=EpsilonFirst,
+            ),
         EPSILON_SUBTYPE_GREEDY: EpsilonMethod(
             subtype=EPSILON_SUBTYPE_GREEDY,
             bandit_class=EpsilonGreedy,
diff --git a/moe/tests/bandit/epsilon_first_test.py b/moe/tests/bandit/epsilon_first_test.py
new file mode 100644
index 00000000..cfe8c799
--- /dev/null
+++ b/moe/tests/bandit/epsilon_first_test.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+"""Test epsilon-first bandit implementation.
+
+Test default values with one, two, and three arms.
+Test one arm with various epsilon values.
+
+"""
+import testify as T
+
+from moe.bandit.epsilon_first import EpsilonFirst
+from moe.tests.bandit.epsilon_test_case import EpsilonTestCase
+
+
+class EpsilonFirstTest(EpsilonTestCase):
+
+    """Verify that different epsilon values and historical infos return correct results."""
+
+    bandit_class = EpsilonFirst
+
+    total_samples_to_test = [1, 10, 100]
+
+    def test_init_default(self):
+        """Verify that default values do not throw and error. This is purely an integration test."""
+        self._test_init_default()
+
+    def test_one_arm(self):
+        """Check that the one-arm case always returns the given arm as the winning arm and the allocation is 1.0."""
+        for epsilon in self.epsilons_to_test:
+            for total_samples in self.total_samples_to_test:
+                bandit = self.bandit_class(self.one_arm_test_case, epsilon, total_samples)
+                T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0})
+                T.assert_equal(bandit.choose_arm(), "arm1")
+
+    def test_two_new_arms(self):
+        """Check that the two-new-arms case always allocate each arm equally (the allocation is 0.5 for both arms)."""
+        for epsilon in self.epsilons_to_test:
+            for total_samples in self.total_samples_to_test:
+                bandit = self.bandit_class(self.two_new_arms_test_case, epsilon, total_samples)
+                T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5})
+
+    def test_two_arms_epsilon_zero(self):
+        """Check that the two-arms case with zero epsilon (always exploit) always allocate arm1:1.0 and arm2:0.0 when average payoffs are arm1:1.0 and arm2:0.0."""
+        epsilon = 0.0
+        bandit = self.bandit_class(self.two_arms_test_case, epsilon)
+        T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0})
+        T.assert_equal(bandit.choose_arm(), "arm1")
+
+    def test_two_arms_epsilon_one(self):
+        """Check that the two-arms case with one epsilon (always explore) always allocate arm1:0.5 and arm2:0.5 when average payoffs are arm1:1.0 and arm2:0.0."""
+        epsilon = 1.0
+        bandit = self.bandit_class(self.two_arms_test_case, epsilon)
+        T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5})
+
+    def test_three_arms_explore(self):
+        """Check that the three-arms cases with integer and float payoffs in exploration phase return the expected arm allocations."""
+        epsilon = 0.5
+        total_samples = 10
+        for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]:
+            bandit = self.bandit_class(historical_info, epsilon, total_samples)
+            T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0, "arm3": 0.0})
+
+    def test_three_arms_exploit(self):
+        """Check that the three-arms cases with integer and float payoffs in exploitation phase return the expected arm allocations."""
+        epsilon = 0.7
+        total_samples = 10
+        equal_allocation = 1.0 / 3
+        for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]:
+            bandit = self.bandit_class(historical_info, epsilon, total_samples)
+            T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": equal_allocation, "arm2": equal_allocation, "arm3": equal_allocation})
+
+
+if __name__ == "__main__":
+    T.run()
diff --git a/moe/views/rest/bandit_epsilon.py b/moe/views/rest/bandit_epsilon.py
index a89164c2..005a2f68 100644
--- a/moe/views/rest/bandit_epsilon.py
+++ b/moe/views/rest/bandit_epsilon.py
@@ -6,6 +6,7 @@
     2. pretty and backend views
 """
 import colander
+import copy
 
 from pyramid.view import view_config
 
@@ -14,7 +15,7 @@
 from moe.views.bandit_pretty_view import BanditPrettyView
 from moe.views.constant import BANDIT_EPSILON_ROUTE_NAME, BANDIT_EPSILON_PRETTY_ROUTE_NAME
 from moe.views.pretty_view import PRETTY_RENDERER
-from moe.views.schemas import ArmAllocations, BanditEpsilonHyperparameterInfo, BanditHistoricalInfo
+from moe.views.schemas import ArmAllocations, BanditHistoricalInfo, BANDIT_EPSILON_SUBTYPES_TO_HYPERPARAMETER_INFO_SCHEMA_CLASSES
 from moe.views.utils import _make_bandit_historical_info_from_params
 
 
@@ -75,7 +76,10 @@ class BanditEpsilonRequest(colander.MappingSchema):
             missing=EPSILON_SUBTYPE_GREEDY,
             )
     historical_info = BanditHistoricalInfo()
-    hyperparameter_info = BanditEpsilonHyperparameterInfo()
+    hyperparameter_info = colander.SchemaNode(
+            colander.Mapping(unknown='preserve'),
+            missing={},
+            )
 
 
 class BanditEpsilonResponse(colander.MappingSchema):
@@ -125,6 +129,38 @@ class BanditEpsilonView(BanditPrettyView):
             "hyperparameter_info": {"epsilon": DEFAULT_EPSILON},
             }
 
+    def get_params_from_request(self):
+        """Return the deserialized parameters from the json_body of a request.
+
+        We explicitly pull out the ``hyparparameter_info`` and use it to deserialize and validate
+        the other parameters (epsilon, total_samples).
+
+        This is necessary because we have different hyperparameters for
+        different subtypes.
+
+        :returns: A deserialized self.request_schema object
+        :rtype: dict
+
+        """
+        # First we get the standard params (not including historical info)
+        params = super(BanditEpsilonView, self).get_params_from_request()
+
+        # colander deserialized results are READ-ONLY. We will potentially be overwriting
+        # fields of ``params['optimizer_info']``, so we need to copy it first.
+        params['hyperparameter_info'] = copy.deepcopy(params['hyperparameter_info'])
+
+        # Find the schma class that corresponds to the ``optimizer_type`` of the request
+        # optimizer_parameters has *not been validated yet*, so we need to validate manually.
+        schema_class = BANDIT_EPSILON_SUBTYPES_TO_HYPERPARAMETER_INFO_SCHEMA_CLASSES[params['subtype']]()
+
+        # Deserialize and validate the parameters
+        validated_hyperparameter_info = schema_class.deserialize(params['hyperparameter_info'])
+
+        # Put the now validated hyperparameter info back into the params dictionary to be consumed by the view
+        params['hyperparameter_info'] = validated_hyperparameter_info
+
+        return params
+
     @view_config(route_name=_pretty_route_name, renderer=PRETTY_RENDERER)
     def pretty_view(self):
         """A pretty, browser interactive view for the interface. Includes form request and response.
@@ -153,9 +189,8 @@ def bandit_epsilon_view(self):
 
         subtype = params.get('subtype')
         historical_info = _make_bandit_historical_info_from_params(params)
-        epsilon = params.get('hyperparameter_info').get('epsilon')
 
-        bandit_class = EPSILON_SUBTYPES_TO_EPSILON_METHODS[subtype].bandit_class(historical_info=historical_info, epsilon=epsilon)
+        bandit_class = EPSILON_SUBTYPES_TO_EPSILON_METHODS[subtype].bandit_class(historical_info=historical_info, **params.get('hyperparameter_info'))
 
         return self.form_response({
                 'endpoint': self._route_name,
diff --git a/moe/views/schemas.py b/moe/views/schemas.py
index 8b9a85d3..bb23c9fc 100644
--- a/moe/views/schemas.py
+++ b/moe/views/schemas.py
@@ -10,7 +10,7 @@
 """
 import colander
 
-from moe.bandit.constant import DEFAULT_EPSILON
+from moe.bandit.constant import DEFAULT_EPSILON, DEFAULT_TOTAL_SAMPLES, EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY
 from moe.bandit.data_containers import SampleArm
 from moe.optimal_learning.python.constant import GRADIENT_DESCENT_OPTIMIZER, TENSOR_PRODUCT_DOMAIN_TYPE, SQUARE_EXPONENTIAL_COVARIANCE_TYPE, NULL_OPTIMIZER, NEWTON_OPTIMIZER, DOMAIN_TYPES, OPTIMIZER_TYPES, COVARIANCE_TYPES, CONSTANT_LIAR_METHODS, DEFAULT_MAX_NUM_THREADS, MAX_ALLOWED_NUM_THREADS, DEFAULT_EXPECTED_IMPROVEMENT_MC_ITERATIONS, LIKELIHOOD_TYPES, LOG_MARGINAL_LIKELIHOOD, DEFAULT_CONSTANT_LIAR_METHOD, DEFAULT_CONSTANT_LIAR_LIE_NOISE_VARIANCE, DEFAULT_KRIGING_NOISE_VARIANCE, DEFAULT_KRIGING_STD_DEVIATION_COEF
 
@@ -326,13 +326,14 @@ class CovarianceInfo(StrictMappingSchema):
             )
 
 
-class BanditEpsilonHyperparameterInfo(colander.MappingSchema):
+class BanditEpsilonFirstHyperparameterInfo(StrictMappingSchema):
 
-    """The hyperparameter info needed for every  Bandit Epsilon request.
+    """The hyperparameter info needed for every  Bandit Epsilon-First request.
 
     **Required fields**
 
-        :epsilon: epsilon value for epsilon-greedy bandit. This strategy pulls the optimal arm (best expected return) with probability 1-epsilon. With probability epsilon a random arm is pulled.
+        :epsilon: epsilon value for epsilon bandits. This strategy pulls the optimal arm (best expected return) with probability 1-epsilon. With probability epsilon a random arm is pulled.
+        :total_samples: total number of samples for epsilon-first bandit. total_samples is T from :doc:`bandit`.
 
     """
 
@@ -342,6 +343,37 @@ class BanditEpsilonHyperparameterInfo(colander.MappingSchema):
             missing=DEFAULT_EPSILON,
             )
 
+    total_samples = colander.SchemaNode(
+            colander.Int(),
+            validator=colander.Range(min=0),
+            missing=DEFAULT_TOTAL_SAMPLES,
+            )
+
+
+class BanditEpsilonGreedyHyperparameterInfo(StrictMappingSchema):
+
+    """The hyperparameter info needed for every  Bandit Epsilon-Greedy request.
+
+    **Required fields**
+
+        :epsilon: epsilon value for epsilon bandits. This strategy pulls the optimal arm (best expected return) with probability 1-epsilon. With probability epsilon a random arm is pulled.
+
+    """
+
+    epsilon = colander.SchemaNode(
+            colander.Float(),
+            validator=colander.Range(min=0),
+            missing=DEFAULT_EPSILON,
+            )
+
+
+#: Mapping from bandit epsilon subtypes (:const:`moe.bandit.constant.EPSILON_SUBTYPES`) to
+#: hyperparameter info schemas, e.g., :class:`moe.views.schemas.BanditEpsilonFirstHyperparameterInfo`.
+BANDIT_EPSILON_SUBTYPES_TO_HYPERPARAMETER_INFO_SCHEMA_CLASSES = {
+        EPSILON_SUBTYPE_FIRST: BanditEpsilonFirstHyperparameterInfo,
+        EPSILON_SUBTYPE_GREEDY: BanditEpsilonGreedyHyperparameterInfo,
+        }
+
 
 class GpHistoricalInfo(StrictMappingSchema):
 

From a40389f6f42fe034c3274610acbf7cfbc417fa79 Mon Sep 17 00:00:00 2001
From: norases <norases@gmail.com>
Date: Wed, 30 Jul 2014 19:08:17 -0700
Subject: [PATCH 2/6] Added tests for rest view.

---
 moe/bandit/__init__.py                      |  1 +
 moe/bandit/constant.py                      | 13 ++-
 moe/bandit/epsilon_first.py                 |  2 +-
 moe/tests/bandit/__init__.py                |  1 +
 moe/tests/views/rest/bandit_epsilon_test.py | 91 +++++++++++++--------
 moe/views/rest/bandit_epsilon.py            |  3 +-
 6 files changed, 71 insertions(+), 40 deletions(-)

diff --git a/moe/bandit/__init__.py b/moe/bandit/__init__.py
index 38c5143e..7cfa05bd 100644
--- a/moe/bandit/__init__.py
+++ b/moe/bandit/__init__.py
@@ -4,6 +4,7 @@
 Contains:
 
     * :mod:`moe.bandit.epsilon.Epsilon`
+    * :mod:`moe.bandit.epsilon_greedy.EpsilonFirst`
     * :mod:`moe.bandit.epsilon_greedy.EpsilonGreedy`
 
 """
diff --git a/moe/bandit/constant.py b/moe/bandit/constant.py
index 5b9173e1..7d58454c 100644
--- a/moe/bandit/constant.py
+++ b/moe/bandit/constant.py
@@ -8,10 +8,6 @@
                                             }
                            }
 
-# Default Hyperparameters
-DEFAULT_EPSILON = 0.05
-DEFAULT_TOTAL_SAMPLES = 100
-
 # Epsilon subtypes
 EPSILON_SUBTYPE_FIRST = 'first'
 EPSILON_SUBTYPE_GREEDY = 'greedy'
@@ -19,3 +15,12 @@
                 EPSILON_SUBTYPE_FIRST,
                 EPSILON_SUBTYPE_GREEDY,
                 ]
+
+# Default Hyperparameters
+DEFAULT_EPSILON = 0.05
+DEFAULT_TOTAL_SAMPLES = 100
+EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS = {
+        EPSILON_SUBTYPE_FIRST: {'epsilon': DEFAULT_EPSILON,
+                                'total_samples': DEFAULT_TOTAL_SAMPLES},
+        EPSILON_SUBTYPE_GREEDY: {'epsilon': DEFAULT_EPSILON},
+        }
diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py
index e6e16682..0da95f95 100644
--- a/moe/bandit/epsilon_first.py
+++ b/moe/bandit/epsilon_first.py
@@ -103,7 +103,7 @@ def allocate_arms(self):
                 arms_to_allocations[arm_name] = equal_allocation
             return arms_to_allocations
 
-        # Exploitation phase, trials 1,2,..., epsilon * T+1, ..., T
+        # Exploitation phase, trials epsilon * T+1, ..., T
         avg_payoff_arm_name_list = []
         for arm_name, sampled_arm in arms_sampled.iteritems():
             avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0
diff --git a/moe/tests/bandit/__init__.py b/moe/tests/bandit/__init__.py
index eccf36b5..987476ad 100644
--- a/moe/tests/bandit/__init__.py
+++ b/moe/tests/bandit/__init__.py
@@ -21,6 +21,7 @@
 **Files in this package**
 
 * :mod:`moe.tests.bandit.bandit_test_case`: base test case for bandit tests with a simple integration test case
+* :mod:`moe.tests.bandit.epsilon_first_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonFirst`
 * :mod:`moe.tests.bandit.epsilon_greedy_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonGreedy`
 * :mod:`moe.tests.bandit.epsilon_test_case`: test cases for classes under :mod:`moe.bandit.epsilon.Epsilon`
 * :mod:`moe.tests.bandit.linkers_test`: tests for :mod:`moe.bandit.linkers`
diff --git a/moe/tests/views/rest/bandit_epsilon_test.py b/moe/tests/views/rest/bandit_epsilon_test.py
index 615cb02b..3caf8f56 100644
--- a/moe/tests/views/rest/bandit_epsilon_test.py
+++ b/moe/tests/views/rest/bandit_epsilon_test.py
@@ -6,7 +6,7 @@
 
 import testify as T
 
-from moe.bandit.constant import DEFAULT_EPSILON, EPSILON_SUBTYPE_GREEDY
+from moe.bandit.constant import EPSILON_SUBTYPES, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS, EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY
 from moe.tests.bandit.bandit_test_case import BanditTestCase
 from moe.tests.views.rest_test_case import RestTestCase
 from moe.views.constant import BANDIT_EPSILON_MOE_ROUTE
@@ -17,26 +17,22 @@ class TestBanditEpsilonViews(BanditTestCase, RestTestCase):
 
     """Integration test for the /bandit/epsilon endpoint."""
 
-    precompute_gaussian_process_data = True
-
-    def _build_json_payload(self, subtype, historical_info, epsilon):
+    def _build_json_payload(self, subtype, historical_info, hyperparameter_info):
         """Create a json_payload to POST to the /bandit/epsilon endpoint with all needed info."""
         dict_to_dump = {
             'subtype': subtype,
             'historical_info': historical_info.json_payload(),
-            'hyperparameter_info': {
-                'epsilon': epsilon,
-                },
+            'hyperparameter_info': hyperparameter_info,
             }
 
         return json.dumps(dict_to_dump)
 
-    def test_hyperparameters_passed_through(self):
-        """Test that the hyperparameters get passed through to the endpoint."""
+    def test_epsilon_greedy_hyperparameters_passed_through(self):
+        """Test that the hyperparameters get passed through to the epsilon-greedy endpoint."""
         historical_info = self.one_arm_test_case
 
         # Test default test parameters get passed through
-        json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, DEFAULT_EPSILON))
+        json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[EPSILON_SUBTYPE_GREEDY]))
 
         request = pyramid.testing.DummyRequest(post=json_payload)
         request.json_body = json_payload
@@ -55,38 +51,65 @@ def test_hyperparameters_passed_through(self):
 
         T.assert_dicts_equal(params['hyperparameter_info'], json_payload['hyperparameter_info'])
 
+    def test_epsilon_first_hyperparameters_passed_through(self):
+        """Test that the hyperparameters get passed through to the epsilon-first endpoint."""
+        historical_info = self.one_arm_test_case
+
+        # Test default test parameters get passed through
+        json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_FIRST, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[EPSILON_SUBTYPE_FIRST]))
+
+        request = pyramid.testing.DummyRequest(post=json_payload)
+        request.json_body = json_payload
+        view = BanditEpsilonView(request)
+        params = view.get_params_from_request()
+
+        T.assert_dicts_equal(params['hyperparameter_info'], json_payload['hyperparameter_info'])
+
+        # Test an arbitrary epsilon and total_tamples get passed through
+        json_payload['hyperparameter_info']['epsilon'] = 1.0
+        json_payload['hyperparameter_info']['total_samples'] = 20000
+
+        request = pyramid.testing.DummyRequest(post=json_payload)
+        request.json_body = json_payload
+        view = BanditEpsilonView(request)
+        params = view.get_params_from_request()
+
+        T.assert_dicts_equal(params['hyperparameter_info'], json_payload['hyperparameter_info'])
+
     def test_historical_info_passed_through(self):
         """Test that the historical info get passed through to the endpoint."""
-        for historical_info in self.historical_infos_to_test:
-            # Test default test parameters get passed through
-            json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, DEFAULT_EPSILON))
+        for subtype in EPSILON_SUBTYPES:
+            for historical_info in self.historical_infos_to_test:
+                # Test default test parameters get passed through
+                json_payload = json.loads(self._build_json_payload(subtype, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[subtype]))
 
-            request = pyramid.testing.DummyRequest(post=json_payload)
-            request.json_body = json_payload
-            view = BanditEpsilonView(request)
-            params = view.get_params_from_request()
+                request = pyramid.testing.DummyRequest(post=json_payload)
+                request.json_body = json_payload
+                view = BanditEpsilonView(request)
+                params = view.get_params_from_request()
 
-            T.assert_dicts_equal(params['historical_info'], json_payload['historical_info'])
+                T.assert_dicts_equal(params['historical_info'], json_payload['historical_info'])
 
     def test_interface_returns_as_expected(self):
         """Integration test for the /bandit/epsilon endpoint."""
         moe_route = BANDIT_EPSILON_MOE_ROUTE
-        for historical_info in self.historical_infos_to_test:
-            json_payload = self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, DEFAULT_EPSILON)
-            arm_names = set([arm_name for arm_name in historical_info.arms_sampled.iterkeys()])
-            resp = self.testapp.post(moe_route.endpoint, json_payload)
-            resp_schema = BanditEpsilonResponse()
-            resp_dict = resp_schema.deserialize(json.loads(resp.body))
-            resp_arm_names = set([arm_name for arm_name in resp_dict['arm_allocations'].iterkeys()])
-            T.assert_sets_equal(arm_names, resp_arm_names)
-            # The allocations should be in range [0, 1]
-            # The sum of all allocations should be 1.0.
-            total_allocation = 0
-            for allocation in resp_dict['arm_allocations'].itervalues():
-                T.assert_gte(allocation, 0)
-                T.assert_lte(allocation, 1)
-                total_allocation += allocation
-            T.assert_equal(total_allocation, 1.0)
+        for subtype in EPSILON_SUBTYPES:
+            for historical_info in self.historical_infos_to_test:
+                json_payload = self._build_json_payload(subtype, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[subtype])
+                arm_names = set([arm_name for arm_name in historical_info.arms_sampled.iterkeys()])
+                resp = self.testapp.post(moe_route.endpoint, json_payload)
+                resp_schema = BanditEpsilonResponse()
+                resp_dict = resp_schema.deserialize(json.loads(resp.body))
+                resp_arm_names = set([arm_name for arm_name in resp_dict['arm_allocations'].iterkeys()])
+                T.assert_sets_equal(arm_names, resp_arm_names)
+                # The allocations should be in range [0, 1]
+                # The sum of all allocations should be 1.0.
+                total_allocation = 0
+                for allocation in resp_dict['arm_allocations'].itervalues():
+                    T.assert_gte(allocation, 0)
+                    T.assert_lte(allocation, 1)
+                    total_allocation += allocation
+                T.assert_equal(total_allocation, 1.0)
 
 
 if __name__ == "__main__":
diff --git a/moe/views/rest/bandit_epsilon.py b/moe/views/rest/bandit_epsilon.py
index fd6a677f..9a197156 100644
--- a/moe/views/rest/bandit_epsilon.py
+++ b/moe/views/rest/bandit_epsilon.py
@@ -6,6 +6,7 @@
     2. pretty and backend views
 """
 import colander
+
 import copy
 
 from pyramid.view import view_config
@@ -89,7 +90,7 @@ class BanditEpsilonResponse(colander.MappingSchema):
     **Output fields**
 
         :endpoint: the endpoint that was called
-        :arms: a dictionary of (arm name, allocaiton) key-value pairs (:class:`moe.views.schemas.ArmAllocations`)
+        :arms: a dictionary of (arm name, allocation) key-value pairs (:class:`moe.views.schemas.ArmAllocations`)
         :winner: winning arm name
 
     **Example Response**

From 2598db812341dd2b88c9c8c923e96f627527d890 Mon Sep 17 00:00:00 2001
From: norases <norases@gmail.com>
Date: Thu, 31 Jul 2014 19:05:56 -0700
Subject: [PATCH 3/6] Added documentation for epsilon-first.

---
 docs/bandit.rst                        |  3 +--
 moe/bandit/__init__.py                 |  8 ++------
 moe/bandit/epsilon_first.py            |  3 ++-
 moe/tests/bandit/epsilon_first_test.py | 19 +++++++++++++------
 moe/views/__init__.py                  |  4 +++-
 moe/views/rest/__init__.py             | 10 ++++++++++
 moe/views/schemas/__init__.py          |  1 +
 7 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/docs/bandit.rst b/docs/bandit.rst
index d457b30d..1fc7a3bd 100644
--- a/docs/bandit.rst
+++ b/docs/bandit.rst
@@ -59,12 +59,12 @@ There are many different policies for this problem:
 
 We have implemented the following policies in our package:
 
+* :mod:`~moe.bandit.epsilon_first.EpsilonFirst`
 * :mod:`~moe.bandit.epsilon_greedy.EpsilonGreedy`
 
 Other policies include:
 
 * Weighted random choice
-* `Epsilon-first`_
 * `Epsilon-decreasing`_ \*
 * `UCB-exp (Upper Confidence Bound)`_ \*
 * `UCB-tuned`_ \*
@@ -73,7 +73,6 @@ Other policies include:
 
 \* Regret bounded as :math:`t \rightarrow \infty`
 
-.. _Epsilon-first: http://en.wikipedia.org/wiki/Multi-armed_bandit#Semi-uniform_strategies
 .. _Epsilon-decreasing: http://en.wikipedia.org/wiki/Multi-armed_bandit#Semi-uniform_strategies
 .. _UCB-exp (Upper Confidence Bound): http://moodle.technion.ac.il/pluginfile.php/192340/mod_resource/content/0/UCB.pdf
 .. _UCB-tuned: http://moodle.technion.ac.il/pluginfile.php/192340/mod_resource/content/0/UCB.pdf
diff --git a/moe/bandit/__init__.py b/moe/bandit/__init__.py
index 7f6e2341..00c7d99e 100644
--- a/moe/bandit/__init__.py
+++ b/moe/bandit/__init__.py
@@ -3,14 +3,11 @@
 
 **Files in this package**
 
-<<<<<<< HEAD
-    * :mod:`moe.bandit.epsilon.Epsilon`
-    * :mod:`moe.bandit.epsilon_greedy.EpsilonFirst`
-    * :mod:`moe.bandit.epsilon_greedy.EpsilonGreedy`
-=======
 * :mod:`moe.bandit.constant`: some default configuration values for ``optimal_learning`` components
 * :mod:`moe.bandit.data_containers`: :class:`~moe.bandit.data_containers.SampleArm`
   and :class:`~moe.bandit.data_containers.HistoricalData` containers for passing data to the ``bandit`` library
+* :mod:`moe.bandit.epsilon_first`: :class:`~moe.bandit.epsilon_first.EpsilonFirst`
+  object for allocating bandit arms and choosing the winning arm based on epsilon-first policy.
 * :mod:`moe.bandit.epsilon_greedy`: :class:`~moe.bandit.epsilon_greedy.EpsilonGreedy`
   object for allocating bandit arms and choosing the winning arm based on epsilon-greedy policy.
 * :mod:`moe.bandit.epsilon`: a base :class:`~moe.bandit.epsilon.Epsilon`
@@ -26,6 +23,5 @@
 
 A set of abstract base classes (ABCs) defining an interface for interacting with ``bandit``. These consist of composable
 functions and classes to allocate bandit arms and choose arm.
->>>>>>> c802816b180e60ae732239d10f3e7f99ffb078cf
 
 """
diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py
index 0da95f95..1cd65704 100644
--- a/moe/bandit/epsilon_first.py
+++ b/moe/bandit/epsilon_first.py
@@ -86,6 +86,7 @@ def allocate_arms(self):
 
         :return: the dictionary of (arm, allocation) key-value pairs
         :rtype: a dictionary of (String(), float64) pairs
+
         """
         arms_sampled = self._historical_info.arms_sampled
         num_arms = self._historical_info.num_arms
@@ -110,7 +111,7 @@ def allocate_arms(self):
             avg_payoff_arm_name_list.append((avg_payoff, arm_name))
         avg_payoff_arm_name_list.sort(reverse=True)
 
-        best_payoff, _ = avg_payoff_arm_name_list[0]
+        best_payoff, _ = max(avg_payoff_arm_name_list)
         # Filter out arms that have average payoff less than the best payoff
         winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list)
         # Extract a list of winning arm names from a list of (average payoff, arm name) tuples.
diff --git a/moe/tests/bandit/epsilon_first_test.py b/moe/tests/bandit/epsilon_first_test.py
index cfe8c799..9c1265b9 100644
--- a/moe/tests/bandit/epsilon_first_test.py
+++ b/moe/tests/bandit/epsilon_first_test.py
@@ -32,7 +32,7 @@ def test_one_arm(self):
                 T.assert_equal(bandit.choose_arm(), "arm1")
 
     def test_two_new_arms(self):
-        """Check that the two-new-arms case always allocate each arm equally (the allocation is 0.5 for both arms)."""
+        """Check that the two-new-arms case always allocate each arm equally (the allocation is 0.5 for both arms). This tests num_winning_arms == num_arms > 1."""
         for epsilon in self.epsilons_to_test:
             for total_samples in self.total_samples_to_test:
                 bandit = self.bandit_class(self.two_new_arms_test_case, epsilon, total_samples)
@@ -53,20 +53,27 @@ def test_two_arms_epsilon_one(self):
 
     def test_three_arms_explore(self):
         """Check that the three-arms cases with integer and float payoffs in exploration phase return the expected arm allocations."""
-        epsilon = 0.5
+        epsilon = 0.7
         total_samples = 10
+        equal_allocation = 1.0 / 3
         for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]:
             bandit = self.bandit_class(historical_info, epsilon, total_samples)
-            T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0, "arm3": 0.0})
+            T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": equal_allocation, "arm2": equal_allocation, "arm3": equal_allocation})
 
     def test_three_arms_exploit(self):
         """Check that the three-arms cases with integer and float payoffs in exploitation phase return the expected arm allocations."""
-        epsilon = 0.7
+        epsilon = 0.5
         total_samples = 10
-        equal_allocation = 1.0 / 3
         for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]:
             bandit = self.bandit_class(historical_info, epsilon, total_samples)
-            T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": equal_allocation, "arm2": equal_allocation, "arm3": equal_allocation})
+            T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0, "arm3": 0.0})
+
+    def test_three_arms_exploit_two_winners(self):
+        """Check that the three-arms cases with two winners in exploitation phase return the expected arm allocations. This tests num_arms > num_winning_arms > 1."""
+        epsilon = 0.5
+        total_samples = 10
+        bandit = self.bandit_class(self.three_arms_two_winners_test_case, epsilon, total_samples)
+        T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5, "arm3": 0.0})
 
 
 if __name__ == "__main__":
diff --git a/moe/views/__init__.py b/moe/views/__init__.py
index cb01da4f..dd3babda 100644
--- a/moe/views/__init__.py
+++ b/moe/views/__init__.py
@@ -5,7 +5,9 @@
 
     * :mod:`moe.views.frontend`: the frontend code
     * :mod:`moe.views.rest`: various REST endpoints for internal gaussian process information
-    * :mod:`moe.views.gp_pretty_view`: base view for all REST endpoints
+    * :mod:`moe.views.pretty_view`: base view for all REST endpoints
+    * :mod:`moe.views.bandit_pretty_view`: base view for all bandit REST endpoints
+    * :mod:`moe.views.gp_pretty_view`: base view for all GP REST endpoints
     * :mod:`moe.views.optimizable_gp_pretty_view`: base view for REST endpoints that require optimization
     * :mod:`moe.views.gp_next_points_pretty_view`: base view for getting the next best points to sample
     * :mod:`moe.views.schemas`: schemas used to deserialize/serialize inputs/outputs in the REST interface
diff --git a/moe/views/rest/__init__.py b/moe/views/rest/__init__.py
index d05ed654..ab954c9f 100644
--- a/moe/views/rest/__init__.py
+++ b/moe/views/rest/__init__.py
@@ -45,4 +45,14 @@
 
         .. http:get:: /gp/next_points/kriging/pretty
 
+**Bandit endpoints:**
+
+    * :mod:`~moe.views.rest.bandit_epsilon`
+
+        .. http:post:: /bandit/epsilon
+
+           Calculates the arm allocations and the best arm to pull next, given subtype, historical data, hyperparameters.
+
+        .. http:get:: /bandit/epsilon/pretty
+
 """
diff --git a/moe/views/schemas/__init__.py b/moe/views/schemas/__init__.py
index cc8820f7..b217ea6b 100644
--- a/moe/views/schemas/__init__.py
+++ b/moe/views/schemas/__init__.py
@@ -3,6 +3,7 @@
 
 Contains:
 
+    * :mod:`moe.views.schemas.bandit_pretty_view`: common schemas for the ``bandit_*`` endpoints
     * :mod:`moe.views.schemas.base_schemas`: basic building-block schemas for use in other, more complex schemas
     * :mod:`moe.views.schemas.gp_next_points_pretty_view`: common schemas for the ``gp_next_points_*`` endpoints
     * :mod:`moe.views.rest`: schemas for specific REST endpoints

From 2d2e7e692ddf9d3529b99fd96f7fe7c74632098e Mon Sep 17 00:00:00 2001
From: norases <norases@gmail.com>
Date: Fri, 1 Aug 2014 14:35:50 -0700
Subject: [PATCH 4/6] Addressed Scott's and Eric's comments, moved duplicate
 method _get_winning_arm_names out to superclass Epsilon.

---
 moe/bandit/epsilon.py                   | 32 +++++++++++++++++--
 moe/bandit/epsilon_first.py             | 42 ++++++++++---------------
 moe/bandit/epsilon_greedy.py            | 18 +++--------
 moe/views/schemas/bandit_pretty_view.py |  2 +-
 4 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/moe/bandit/epsilon.py b/moe/bandit/epsilon.py
index 09eb47eb..a07034bc 100644
--- a/moe/bandit/epsilon.py
+++ b/moe/bandit/epsilon.py
@@ -4,16 +4,17 @@
 See :class:`moe.bandit.interfaces.bandit_interface` for further details on bandit.
 
 """
-
 import copy
 
+import numpy
+
 from moe.bandit.constant import DEFAULT_EPSILON
 from moe.bandit.interfaces.bandit_interface import BanditInterface
 
 
 class Epsilon(BanditInterface):
 
-    r"""Implementation of the constructor of Epsilon. Abstract method allocate_arms implemented in subclass.
+    r"""Implementation of the constructor and common methods of Epsilon. Abstract method allocate_arms implemented in subclass.
 
     A class to encapsulate the computation of bandit epsilon.
     Epsilon is the sole hyperparameter in this class. Subclasses may contain other hyperparameters.
@@ -41,3 +42,30 @@ def __init__(
         self._historical_info = copy.deepcopy(historical_info)
         self._subtype = subtype
         self._epsilon = epsilon
+
+    def _get_winning_arm_names(self, arms_sampled):
+        r"""Compute the set of winning arm names based on the given ``arms_sampled``..
+
+        Throws an exception when arms_sampled is empty.
+        Implementers of this interface will never override this method.
+
+        :return: of set of names of the winning arms
+        :rtype: frozenset(String())
+
+        """
+        if not arms_sampled:
+            raise ValueError('sample_arms is empty!')
+
+        avg_payoff_arm_name_list = []
+        for arm_name, sampled_arm in arms_sampled.iteritems():
+            avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0
+            avg_payoff_arm_name_list.append((avg_payoff, arm_name))
+        avg_payoff_arm_name_list.sort(reverse=True)
+
+        best_payoff, _ = max(avg_payoff_arm_name_list)
+        # Filter out arms that have average payoff less than the best payoff
+        winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list)
+        # Extract a list of winning arm names from a list of (average payoff, arm name) tuples.
+        _, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list))
+        winning_arm_names = frozenset(winning_arm_name_list)
+        return winning_arm_names
diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py
index 1cd65704..6e848d73 100644
--- a/moe/bandit/epsilon_first.py
+++ b/moe/bandit/epsilon_first.py
@@ -16,8 +16,8 @@ class EpsilonFirst(Epsilon):
 
     A class to encapsulate the computation of bandit epsilon first.
 
-    total_samples is the total number of samples (#to sample + #sampled)
-    #sampled is calculated by summing up total from each arm sampled.
+    total_samples is the total number of samples (number to sample + number sampled)
+    number sampled is calculated by summing up total from each arm sampled.
     total_samples is T from :doc:`bandit`.
 
     See superclass :class:`moe.bandit.epsilon.Epsilon` for further details.
@@ -32,8 +32,8 @@ def __init__(
     ):
         """Construct an EpsilonFirst object. See superclass :class:`moe.bandit.epsilon.Epsilon` for details.
 
-        total_samples is the total number of samples (#to sample + #sampled)
-        #sampled is calculated by summing up total from each arm sampled.
+        total_samples is the total number of samples (number to sample + number sampled)
+        number sampled is calculated by summing up total from each arm sampled.
         total_samples is T from :doc:`bandit`.
 
         """
@@ -57,32 +57,33 @@ def allocate_arms(self):
         If we have a total of T trials, the first :math:`\epsilon` T trials, we only explore.
         After that, we only exploit (t = :math:`\epsilon` T, :math:`\epsilon` T + 1, ..., T).
 
-        In other words, this method will pull a random arm in the exploration phase.
+        This method will pull a random arm in the exploration phase.
         Then this method will pull the optimal arm (best expected return) in the exploitation phase.
 
-        In case of a tie in the exploitation phase, the method will split the probability 1 among the optimal arms.
+        In case of a tie in the exploitation phase, the method will split the allocation among the optimal arms.
 
-        For example, if we have three arms, two arms (arm1 and arm2) with an average payoff of 0.5 ({win:10, losee:10, total:20})
+        For example, if we have three arms, two arms (arm1 and arm2) with an average payoff of 0.5
+        (``{win:10, lose:10, total:20}``)
         and a new arm (arm3, average payoff is 0 and total is 0).
 
         Let the epsilon :math:`\epsilon` be 0.1.
 
         The allocation depends on which phase we are in:
 
-        Case 1: T = 50
+        *Case 1: T = 50*
 
-        Recall that T = #to sample + #sampled. #sampled = 20 + 20 + 0 = 40.
+        Recall that T = number to sample + number sampled. number sampled :math:`= 20 + 20 + 0 = 40`.
         So we are on trial #41. We explore the first :math:`\epsilon T = 0.1 * 50 = 5` trials
-        and thus we are in the exploitation phase. We split probability 1 between the optimal arms arm1 and arm2.
+        and thus we are in the exploitation phase. We split the allocation between the optimal arms arm1 and arm2.
 
-        arm1: 0.5, arm2: 0.5, arm3: 0.0.
+        ``{arm1: 0.5, arm2: 0.5, arm3: 0.0}``
 
-        Case 2: T = 500
+        *Case 2: T = 500*
 
         We explore the first :math:`\epsilon T = 0.1 * 500 = 50` trials.
         Since we are on trail #41, we are in the exploration phase. We choose arms randomly:
 
-        arm1: 0.33, arm2: 0.33, arm3: 0.33.
+        ``{arm1: 0.33, arm2: 0.33, arm3: 0.33}``
 
         :return: the dictionary of (arm, allocation) key-value pairs
         :rtype: a dictionary of (String(), float64) pairs
@@ -92,7 +93,7 @@ def allocate_arms(self):
         num_arms = self._historical_info.num_arms
 
         if not arms_sampled:
-            raise ValueError('sample_arms are empty!')
+            raise ValueError('sample_arms is empty!')
 
         num_sampled = sum([sampled_arm.total for sampled_arm in arms_sampled.itervalues()])
         # Exploration phase, trials 1,2,..., epsilon * T
@@ -105,18 +106,7 @@ def allocate_arms(self):
             return arms_to_allocations
 
         # Exploitation phase, trials epsilon * T+1, ..., T
-        avg_payoff_arm_name_list = []
-        for arm_name, sampled_arm in arms_sampled.iteritems():
-            avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0
-            avg_payoff_arm_name_list.append((avg_payoff, arm_name))
-        avg_payoff_arm_name_list.sort(reverse=True)
-
-        best_payoff, _ = max(avg_payoff_arm_name_list)
-        # Filter out arms that have average payoff less than the best payoff
-        winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list)
-        # Extract a list of winning arm names from a list of (average payoff, arm name) tuples.
-        _, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list))
-        winning_arm_names = frozenset(winning_arm_name_list)
+        winning_arm_names = self._get_winning_arm_names(arms_sampled)
 
         num_winning_arms = len(winning_arm_names)
         arms_to_allocations = {}
diff --git a/moe/bandit/epsilon_greedy.py b/moe/bandit/epsilon_greedy.py
index 2d8843e0..207128f1 100644
--- a/moe/bandit/epsilon_greedy.py
+++ b/moe/bandit/epsilon_greedy.py
@@ -68,18 +68,10 @@ def allocate_arms(self):
         num_arms = self._historical_info.num_arms
         if not arms_sampled:
             raise ValueError('sample_arms are empty!')
-        avg_payoff_arm_name_list = []
-        for arm_name, sampled_arm in arms_sampled.iteritems():
-            avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0
-            avg_payoff_arm_name_list.append((avg_payoff, arm_name))
-
-        best_payoff, _ = max(avg_payoff_arm_name_list)
-        # Filter out arms that have average payoff less than the best payoff
-        winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list)
-        # Extract a list of winning arm names from a list of (average payoff, arm name) tuples.
-        _, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list))
-
-        num_winning_arms = len(winning_arm_name_list)
+
+        winning_arm_names = self._get_winning_arm_names(arms_sampled)
+
+        num_winning_arms = len(winning_arm_names)
         epsilon_allocation = self._epsilon / num_arms
         arms_to_allocations = {}
 
@@ -89,7 +81,7 @@ def allocate_arms(self):
 
         # With probability 1-epsilon, split allocation among winning arms.
         winning_arm_allocation = (1.0 - self._epsilon) / num_winning_arms
-        for winning_arm_name in winning_arm_name_list:
+        for winning_arm_name in winning_arm_names:
             arms_to_allocations[winning_arm_name] += winning_arm_allocation
 
         return arms_to_allocations
diff --git a/moe/views/schemas/bandit_pretty_view.py b/moe/views/schemas/bandit_pretty_view.py
index f01b1773..2de0a214 100644
--- a/moe/views/schemas/bandit_pretty_view.py
+++ b/moe/views/schemas/bandit_pretty_view.py
@@ -72,7 +72,7 @@ class BanditEpsilonFirstHyperparameterInfo(base_schemas.StrictMappingSchema):
     **Required fields**
 
     :ivar epsilon: (*0.0 <= float64 <= 1.0*) epsilon value for epsilon-first bandit. This strategy pulls the optimal arm
-      (best expected return) with if it is in exploitation phase (#sampled > epsilon * total_samples). Otherwise a random arm is pulled (exploration).
+      (best expected return) with if it is in exploitation phase (number sampled > epsilon * total_samples). Otherwise a random arm is pulled (exploration).
     :ivar total_samples: total number of samples for epsilon-first bandit. total_samples is T from :doc:`bandit`.
 
     """

From 77aff65e2b5054ab2da1bfa6dea9d2664c0b719b Mon Sep 17 00:00:00 2001
From: norases <norases@gmail.com>
Date: Fri, 1 Aug 2014 16:19:46 -0700
Subject: [PATCH 5/6] Addressed Eric's comments. Wrote test for static function
 in class Epsilon

---
 CHANGELOG.md                            |  3 ++-
 moe/bandit/epsilon.py                   |  7 +++++--
 moe/bandit/epsilon_first.py             |  4 +---
 moe/bandit/epsilon_greedy.py            |  4 +---
 moe/tests/bandit/__init__.py            |  1 +
 moe/tests/bandit/epsilon_test.py        | 27 +++++++++++++++++++++++++
 moe/views/rest/bandit_epsilon.py        |  6 +++---
 moe/views/schemas/bandit_pretty_view.py |  6 +++---
 8 files changed, 43 insertions(+), 15 deletions(-)
 create mode 100644 moe/tests/bandit/epsilon_test.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 221273ac..7d9d2ce9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,8 @@
 * Features
 
   * Added multi-armed bandit endpoint. (#255)
-    * Implemented epsilon-greedy.
+    * Implemented epsilon-greedy. (#255)
+    * Implemented epsilon-first. (#335) 
   * Added support for the L-BFGS-B optimizer. (#296)
 
 * Changes
diff --git a/moe/bandit/epsilon.py b/moe/bandit/epsilon.py
index a07034bc..a0dce0ae 100644
--- a/moe/bandit/epsilon.py
+++ b/moe/bandit/epsilon.py
@@ -32,7 +32,7 @@ def __init__(
         """Construct an Epsilon object.
 
         :param historical_info: a dictionary of arms sampled
-        :type historical_info: dictionary of (String(), SingleArm()) pairs
+        :type historical_info: dictionary of (String(), SampleArm()) pairs (see :class:`moe.bandit.data_containers.SampleArm` for more details)
         :param subtype: subtype of the epsilon bandit algorithm (default: None)
         :type subtype: String()
         :param epsilon: epsilon hyperparameter for the epsilon bandit algorithm (default: :const:`~moe.bandit.constant.DEFAULT_EPSILON`)
@@ -43,12 +43,15 @@ def __init__(
         self._subtype = subtype
         self._epsilon = epsilon
 
-    def _get_winning_arm_names(self, arms_sampled):
+    @staticmethod
+    def get_winning_arm_names(arms_sampled):
         r"""Compute the set of winning arm names based on the given ``arms_sampled``..
 
         Throws an exception when arms_sampled is empty.
         Implementers of this interface will never override this method.
 
+        :param arms_sampled: a dictionary of arm name to :class:`moe.bandit.data_containers.SampleArm`
+        :type arms_sampled: dictionary of (String(), SampleArm()) pairs
         :return: of set of names of the winning arms
         :rtype: frozenset(String())
 
diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py
index 6e848d73..a099f877 100644
--- a/moe/bandit/epsilon_first.py
+++ b/moe/bandit/epsilon_first.py
@@ -4,8 +4,6 @@
 See :class:`moe.bandit.epsilon.Epsilon` for further details on bandit.
 
 """
-import numpy
-
 from moe.bandit.constant import DEFAULT_EPSILON, DEFAULT_TOTAL_SAMPLES, EPSILON_SUBTYPE_FIRST
 from moe.bandit.epsilon import Epsilon
 
@@ -106,7 +104,7 @@ def allocate_arms(self):
             return arms_to_allocations
 
         # Exploitation phase, trials epsilon * T+1, ..., T
-        winning_arm_names = self._get_winning_arm_names(arms_sampled)
+        winning_arm_names = self.get_winning_arm_names(arms_sampled)
 
         num_winning_arms = len(winning_arm_names)
         arms_to_allocations = {}
diff --git a/moe/bandit/epsilon_greedy.py b/moe/bandit/epsilon_greedy.py
index 207128f1..ffdad037 100644
--- a/moe/bandit/epsilon_greedy.py
+++ b/moe/bandit/epsilon_greedy.py
@@ -4,8 +4,6 @@
 See :class:`moe.bandit.epsilon.Epsilon` for further details on this bandit.
 
 """
-import numpy
-
 from moe.bandit.constant import DEFAULT_EPSILON, EPSILON_SUBTYPE_GREEDY
 from moe.bandit.epsilon import Epsilon
 
@@ -69,7 +67,7 @@ def allocate_arms(self):
         if not arms_sampled:
             raise ValueError('sample_arms are empty!')
 
-        winning_arm_names = self._get_winning_arm_names(arms_sampled)
+        winning_arm_names = self.get_winning_arm_names(arms_sampled)
 
         num_winning_arms = len(winning_arm_names)
         epsilon_allocation = self._epsilon / num_arms
diff --git a/moe/tests/bandit/__init__.py b/moe/tests/bandit/__init__.py
index 987476ad..656ba3d7 100644
--- a/moe/tests/bandit/__init__.py
+++ b/moe/tests/bandit/__init__.py
@@ -23,6 +23,7 @@
 * :mod:`moe.tests.bandit.bandit_test_case`: base test case for bandit tests with a simple integration test case
 * :mod:`moe.tests.bandit.epsilon_first_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonFirst`
 * :mod:`moe.tests.bandit.epsilon_greedy_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonGreedy`
+* :mod:`moe.tests.bandit.epsilon_test`: tests for :mod:`moe.bandit.epsilon_greedy.Epsilon`
 * :mod:`moe.tests.bandit.epsilon_test_case`: test cases for classes under :mod:`moe.bandit.epsilon.Epsilon`
 * :mod:`moe.tests.bandit.linkers_test`: tests for :mod:`moe.bandit.linkers`
 
diff --git a/moe/tests/bandit/epsilon_test.py b/moe/tests/bandit/epsilon_test.py
new file mode 100644
index 00000000..318dc1ac
--- /dev/null
+++ b/moe/tests/bandit/epsilon_test.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+"""Test epsilon bandit implementation (functions common to epsilon bandit).
+
+Test functions in :class:`moe.bandit.epsilon.Epsilon`
+
+"""
+import testify as T
+
+from moe.bandit.epsilon import Epsilon
+from moe.tests.bandit.epsilon_test_case import EpsilonTestCase
+
+
+class EpsilonTest(EpsilonTestCase):
+
+    """Verify that different sample_arms return correct results."""
+
+    def test_two_new_arms(self):
+        """Check that the two-new-arms case always returns both arms as winning arms. This tests num_winning_arms == num_arms > 1."""
+        T.assert_sets_equal(Epsilon.get_winning_arm_names(self.two_new_arms_test_case.arms_sampled), frozenset(["arm1", "arm2"]))
+
+    def test_three_arms_two_winners(self):
+        """Check that the three-arms cases with two winners return the expected winning arms. This tests num_arms > num_winning_arms > 1."""
+        T.assert_sets_equal(Epsilon.get_winning_arm_names(self.three_arms_two_winners_test_case.arms_sampled), frozenset(["arm1", "arm2"]))
+
+
+if __name__ == "__main__":
+    T.run()
diff --git a/moe/views/rest/bandit_epsilon.py b/moe/views/rest/bandit_epsilon.py
index e605b815..130c8ea1 100644
--- a/moe/views/rest/bandit_epsilon.py
+++ b/moe/views/rest/bandit_epsilon.py
@@ -53,11 +53,11 @@ def get_params_from_request(self):
         params = super(BanditEpsilonView, self).get_params_from_request()
 
         # colander deserialized results are READ-ONLY. We will potentially be overwriting
-        # fields of ``params['optimizer_info']``, so we need to copy it first.
+        # fields of ``params['hyperparameter_info']``, so we need to copy it first.
         params['hyperparameter_info'] = copy.deepcopy(params['hyperparameter_info'])
 
-        # Find the schma class that corresponds to the ``optimizer_type`` of the request
-        # optimizer_parameters has *not been validated yet*, so we need to validate manually.
+        # Find the schema class that corresponds to the ``subtype`` of the request
+        # hyperparameter_info has *not been validated yet*, so we need to validate manually.
         schema_class = BANDIT_EPSILON_SUBTYPES_TO_HYPERPARAMETER_INFO_SCHEMA_CLASSES[params['subtype']]()
 
         # Deserialize and validate the parameters
diff --git a/moe/views/schemas/bandit_pretty_view.py b/moe/views/schemas/bandit_pretty_view.py
index 2de0a214..d6c337f4 100644
--- a/moe/views/schemas/bandit_pretty_view.py
+++ b/moe/views/schemas/bandit_pretty_view.py
@@ -73,13 +73,13 @@ class BanditEpsilonFirstHyperparameterInfo(base_schemas.StrictMappingSchema):
 
     :ivar epsilon: (*0.0 <= float64 <= 1.0*) epsilon value for epsilon-first bandit. This strategy pulls the optimal arm
       (best expected return) with if it is in exploitation phase (number sampled > epsilon * total_samples). Otherwise a random arm is pulled (exploration).
-    :ivar total_samples: total number of samples for epsilon-first bandit. total_samples is T from :doc:`bandit`.
+    :ivar total_samples: (*int >= 0*) total number of samples for epsilon-first bandit. total_samples is T from :doc:`bandit`.
 
     """
 
     epsilon = colander.SchemaNode(
             colander.Float(),
-            validator=colander.Range(min=0),
+            validator=colander.Range(min=0.0, max=1.0),
             missing=DEFAULT_EPSILON,
             )
 
@@ -103,7 +103,7 @@ class BanditEpsilonGreedyHyperparameterInfo(base_schemas.StrictMappingSchema):
 
     epsilon = colander.SchemaNode(
             colander.Float(),
-            validator=colander.Range(min=0),
+            validator=colander.Range(min=0.0, max=1.0),
             missing=DEFAULT_EPSILON,
             )
 

From ec4c69411e2030fa81435aaf5e72ffe40538f6a2 Mon Sep 17 00:00:00 2001
From: norases <norases@gmail.com>
Date: Fri, 1 Aug 2014 16:36:38 -0700
Subject: [PATCH 6/6] Added ValueError test for Epsilon class and comments
 about raising ValueError.

---
 moe/bandit/epsilon.py            |  3 ++-
 moe/bandit/epsilon_first.py      |  1 +
 moe/bandit/epsilon_greedy.py     |  1 +
 moe/tests/bandit/epsilon_test.py | 16 ++++++++++++++++
 4 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/moe/bandit/epsilon.py b/moe/bandit/epsilon.py
index a0dce0ae..8de04f6c 100644
--- a/moe/bandit/epsilon.py
+++ b/moe/bandit/epsilon.py
@@ -54,10 +54,11 @@ def get_winning_arm_names(arms_sampled):
         :type arms_sampled: dictionary of (String(), SampleArm()) pairs
         :return: of set of names of the winning arms
         :rtype: frozenset(String())
+        :raise: ValueError when ``arms_sampled`` are empty.
 
         """
         if not arms_sampled:
-            raise ValueError('sample_arms is empty!')
+            raise ValueError('arms_sampled is empty!')
 
         avg_payoff_arm_name_list = []
         for arm_name, sampled_arm in arms_sampled.iteritems():
diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py
index a099f877..d5e48b1c 100644
--- a/moe/bandit/epsilon_first.py
+++ b/moe/bandit/epsilon_first.py
@@ -85,6 +85,7 @@ def allocate_arms(self):
 
         :return: the dictionary of (arm, allocation) key-value pairs
         :rtype: a dictionary of (String(), float64) pairs
+        :raise: ValueError when ``sample_arms`` are empty.
 
         """
         arms_sampled = self._historical_info.arms_sampled
diff --git a/moe/bandit/epsilon_greedy.py b/moe/bandit/epsilon_greedy.py
index ffdad037..d1dfa932 100644
--- a/moe/bandit/epsilon_greedy.py
+++ b/moe/bandit/epsilon_greedy.py
@@ -60,6 +60,7 @@ def allocate_arms(self):
 
         :return: the dictionary of (arm, allocation) key-value pairs
         :rtype: a dictionary of (String(), float64) pairs
+        :raise: ValueError when ``sample_arms`` are empty.
 
         """
         arms_sampled = self._historical_info.arms_sampled
diff --git a/moe/tests/bandit/epsilon_test.py b/moe/tests/bandit/epsilon_test.py
index 318dc1ac..0ba37d57 100644
--- a/moe/tests/bandit/epsilon_test.py
+++ b/moe/tests/bandit/epsilon_test.py
@@ -4,6 +4,8 @@
 Test functions in :class:`moe.bandit.epsilon.Epsilon`
 
 """
+import logging
+
 import testify as T
 
 from moe.bandit.epsilon import Epsilon
@@ -14,6 +16,20 @@ class EpsilonTest(EpsilonTestCase):
 
     """Verify that different sample_arms return correct results."""
 
+    @T.class_setup
+    def disable_logging(self):
+        """Disable logging (for the duration of this test case)."""
+        logging.disable(logging.CRITICAL)
+
+    @T.class_teardown
+    def enable_logging(self):
+        """Re-enable logging (so other test cases are unaffected)."""
+        logging.disable(logging.NOTSET)
+
+    def test_empty_arm_invalid(self):
+        """Test empty ``sample_arms`` causes an ValueError."""
+        T.assert_raises(ValueError, Epsilon.get_winning_arm_names, {})
+
     def test_two_new_arms(self):
         """Check that the two-new-arms case always returns both arms as winning arms. This tests num_winning_arms == num_arms > 1."""
         T.assert_sets_equal(Epsilon.get_winning_arm_names(self.two_new_arms_test_case.arms_sampled), frozenset(["arm1", "arm2"]))