Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Epsilon-First bandit endpoint #350

Merged
merged 9 commits into from
Aug 1, 2014
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
* Features

* Added multi-armed bandit endpoint. (#255)
* Implemented epsilon-greedy.
* Implemented epsilon-greedy. (#255)
* Implemented epsilon-first. (#335)
* Added support for the L-BFGS-B optimizer. (#296)

* Changes
Expand Down
3 changes: 1 addition & 2 deletions docs/bandit.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ There are many different policies for this problem:

We have implemented the following policies in our package:

* :mod:`~moe.bandit.epsilon_first.EpsilonFirst`
* :mod:`~moe.bandit.epsilon_greedy.EpsilonGreedy`

Other policies include:

* Weighted random choice
* `Epsilon-first`_
* `Epsilon-decreasing`_ \*
* `UCB-exp (Upper Confidence Bound)`_ \*
* `UCB-tuned`_ \*
Expand All @@ -73,7 +73,6 @@ Other policies include:

\* Regret bounded as :math:`t \rightarrow \infty`

.. _Epsilon-first: http://en.wikipedia.org/wiki/Multi-armed_bandit#Semi-uniform_strategies
.. _Epsilon-decreasing: http://en.wikipedia.org/wiki/Multi-armed_bandit#Semi-uniform_strategies
.. _UCB-exp (Upper Confidence Bound): http://moodle.technion.ac.il/pluginfile.php/192340/mod_resource/content/0/UCB.pdf
.. _UCB-tuned: http://moodle.technion.ac.il/pluginfile.php/192340/mod_resource/content/0/UCB.pdf
Expand Down
2 changes: 2 additions & 0 deletions moe/bandit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
* :mod:`moe.bandit.constant`: some default configuration values for ``optimal_learning`` components
* :mod:`moe.bandit.data_containers`: :class:`~moe.bandit.data_containers.SampleArm`
and :class:`~moe.bandit.data_containers.HistoricalData` containers for passing data to the ``bandit`` library
* :mod:`moe.bandit.epsilon_first`: :class:`~moe.bandit.epsilon_first.EpsilonFirst`
object for allocating bandit arms and choosing the winning arm based on epsilon-first policy.
* :mod:`moe.bandit.epsilon_greedy`: :class:`~moe.bandit.epsilon_greedy.EpsilonGreedy`
object for allocating bandit arms and choosing the winning arm based on epsilon-greedy policy.
* :mod:`moe.bandit.epsilon`: a base :class:`~moe.bandit.epsilon.Epsilon`
Expand Down
13 changes: 11 additions & 2 deletions moe/bandit/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,19 @@
}
}

DEFAULT_EPSILON = 0.05

# Epsilon subtypes
EPSILON_SUBTYPE_FIRST = 'first'
EPSILON_SUBTYPE_GREEDY = 'greedy'
EPSILON_SUBTYPES = [
EPSILON_SUBTYPE_FIRST,
EPSILON_SUBTYPE_GREEDY,
]

# Default Hyperparameters
DEFAULT_EPSILON = 0.05
DEFAULT_TOTAL_SAMPLES = 100
EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS = {
EPSILON_SUBTYPE_FIRST: {'epsilon': DEFAULT_EPSILON,
'total_samples': DEFAULT_TOTAL_SAMPLES},
EPSILON_SUBTYPE_GREEDY: {'epsilon': DEFAULT_EPSILON},
}
37 changes: 34 additions & 3 deletions moe/bandit/epsilon.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@
See :class:`moe.bandit.interfaces.bandit_interface` for further details on bandit.

"""

import copy

import numpy

from moe.bandit.constant import DEFAULT_EPSILON
from moe.bandit.interfaces.bandit_interface import BanditInterface


class Epsilon(BanditInterface):

r"""Implementation of the constructor of Epsilon. Abstract method allocate_arms implemented in subclass.
r"""Implementation of the constructor and common methods of Epsilon. Abstract method allocate_arms implemented in subclass.

A class to encapsulate the computation of bandit epsilon.
Epsilon is the sole hyperparameter in this class. Subclasses may contain other hyperparameters.
Expand All @@ -31,7 +32,7 @@ def __init__(
"""Construct an Epsilon object.

:param historical_info: a dictionary of arms sampled
:type historical_info: dictionary of (String(), SingleArm()) pairs
:type historical_info: dictionary of (String(), SampleArm()) pairs (see :class:`moe.bandit.data_containers.SampleArm` for more details)
:param subtype: subtype of the epsilon bandit algorithm (default: None)
:type subtype: String()
:param epsilon: epsilon hyperparameter for the epsilon bandit algorithm (default: :const:`~moe.bandit.constant.DEFAULT_EPSILON`)
Expand All @@ -41,3 +42,33 @@ def __init__(
self._historical_info = copy.deepcopy(historical_info)
self._subtype = subtype
self._epsilon = epsilon

@staticmethod
def get_winning_arm_names(arms_sampled):
r"""Compute the set of winning arm names based on the given ``arms_sampled``..

Throws an exception when arms_sampled is empty.
Implementers of this interface will never override this method.

:param arms_sampled: a dictionary of arm name to :class:`moe.bandit.data_containers.SampleArm`
:type arms_sampled: dictionary of (String(), SampleArm()) pairs
:return: of set of names of the winning arms
:rtype: frozenset(String())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:param: and :type: for input

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a :raise: ValueError when ... after rtype

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


"""
if not arms_sampled:
raise ValueError('sample_arms is empty!')

avg_payoff_arm_name_list = []
for arm_name, sampled_arm in arms_sampled.iteritems():
avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0
avg_payoff_arm_name_list.append((avg_payoff, arm_name))
avg_payoff_arm_name_list.sort(reverse=True)

best_payoff, _ = max(avg_payoff_arm_name_list)
# Filter out arms that have average payoff less than the best payoff
winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list)
# Extract a list of winning arm names from a list of (average payoff, arm name) tuples.
_, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list))
winning_arm_names = frozenset(winning_arm_name_list)
return winning_arm_names
117 changes: 117 additions & 0 deletions moe/bandit/epsilon_first.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# -*- coding: utf-8 -*-
"""Classes (Python) to compute the Bandit Epsilon-First arm allocation and choosing the arm to pull next.

See :class:`moe.bandit.epsilon.Epsilon` for further details on bandit.

"""
from moe.bandit.constant import DEFAULT_EPSILON, DEFAULT_TOTAL_SAMPLES, EPSILON_SUBTYPE_FIRST
from moe.bandit.epsilon import Epsilon


class EpsilonFirst(Epsilon):

r"""Implementation of EpsilonFirst.

A class to encapsulate the computation of bandit epsilon first.

total_samples is the total number of samples (number to sample + number sampled)
number sampled is calculated by summing up total from each arm sampled.
total_samples is T from :doc:`bandit`.

See superclass :class:`moe.bandit.epsilon.Epsilon` for further details.

"""

def __init__(
self,
historical_info,
epsilon=DEFAULT_EPSILON,
total_samples=DEFAULT_TOTAL_SAMPLES,
):
"""Construct an EpsilonFirst object. See superclass :class:`moe.bandit.epsilon.Epsilon` for details.

total_samples is the total number of samples (number to sample + number sampled)
number sampled is calculated by summing up total from each arm sampled.
total_samples is T from :doc:`bandit`.

"""
super(EpsilonFirst, self).__init__(
historical_info=historical_info,
subtype=EPSILON_SUBTYPE_FIRST,
epsilon=epsilon,
)
self._total_samples = total_samples

def allocate_arms(self):
r"""Compute the allocation to each arm given ``historical_info``, running bandit ``subtype`` endpoint with hyperparameters in ``hyperparameter_info``.

Computes the allocation to each arm based on the given subtype, historical info, and hyperparameter info.

Works with k-armed bandits (k >= 1).

The Algorithm: http://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions

This method starts with a pure exploration phase, followed by a pure exploitation phase.
If we have a total of T trials, the first :math:`\epsilon` T trials, we only explore.
After that, we only exploit (t = :math:`\epsilon` T, :math:`\epsilon` T + 1, ..., T).

This method will pull a random arm in the exploration phase.
Then this method will pull the optimal arm (best expected return) in the exploitation phase.

In case of a tie in the exploitation phase, the method will split the allocation among the optimal arms.

For example, if we have three arms, two arms (arm1 and arm2) with an average payoff of 0.5
(``{win:10, lose:10, total:20}``)
and a new arm (arm3, average payoff is 0 and total is 0).

Let the epsilon :math:`\epsilon` be 0.1.

The allocation depends on which phase we are in:

*Case 1: T = 50*

Recall that T = number to sample + number sampled. number sampled :math:`= 20 + 20 + 0 = 40`.
So we are on trial #41. We explore the first :math:`\epsilon T = 0.1 * 50 = 5` trials
and thus we are in the exploitation phase. We split the allocation between the optimal arms arm1 and arm2.

``{arm1: 0.5, arm2: 0.5, arm3: 0.0}``

*Case 2: T = 500*

We explore the first :math:`\epsilon T = 0.1 * 500 = 50` trials.
Since we are on trail #41, we are in the exploration phase. We choose arms randomly:

``{arm1: 0.33, arm2: 0.33, arm3: 0.33}``

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you run make -B docs and looked over your rendered doc pages? You should do this.

My experience has been that math, dicts, pseudocode, etc. all read more nicely if wrapped in at least double backticks if not at math-block, e.g.,

the thing, ``{win:10, lose:10, ...}`` is cool

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

:return: the dictionary of (arm, allocation) key-value pairs
:rtype: a dictionary of (String(), float64) pairs

"""
arms_sampled = self._historical_info.arms_sampled
num_arms = self._historical_info.num_arms

if not arms_sampled:
raise ValueError('sample_arms is empty!')

num_sampled = sum([sampled_arm.total for sampled_arm in arms_sampled.itervalues()])
# Exploration phase, trials 1,2,..., epsilon * T
# Allocate equal probability to all arms
if num_sampled < self._total_samples * self._epsilon:
equal_allocation = 1.0 / num_arms
arms_to_allocations = {}
for arm_name in arms_sampled.iterkeys():
arms_to_allocations[arm_name] = equal_allocation
return arms_to_allocations

# Exploitation phase, trials epsilon * T+1, ..., T
winning_arm_names = self.get_winning_arm_names(arms_sampled)

num_winning_arms = len(winning_arm_names)
arms_to_allocations = {}

winning_arm_allocation = 1.0 / num_winning_arms
# Split allocation among winning arms, all other arms get allocation of 0.
for arm_name in arms_sampled.iterkeys():
arms_to_allocations[arm_name] = winning_arm_allocation if arm_name in winning_arm_names else 0.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could also maybe be a method, or part of the above one

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EpsilonGreedy is different though. I don't know if there is enough shared code in this part.


return arms_to_allocations
20 changes: 5 additions & 15 deletions moe/bandit/epsilon_greedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
See :class:`moe.bandit.epsilon.Epsilon` for further details on this bandit.

"""
import numpy

from moe.bandit.constant import DEFAULT_EPSILON, EPSILON_SUBTYPE_GREEDY
from moe.bandit.epsilon import Epsilon

Expand Down Expand Up @@ -68,18 +66,10 @@ def allocate_arms(self):
num_arms = self._historical_info.num_arms
if not arms_sampled:
raise ValueError('sample_arms are empty!')
avg_payoff_arm_name_list = []
for arm_name, sampled_arm in arms_sampled.iteritems():
avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0
avg_payoff_arm_name_list.append((avg_payoff, arm_name))

best_payoff, _ = max(avg_payoff_arm_name_list)
# Filter out arms that have average payoff less than the best payoff
winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list)
# Extract a list of winning arm names from a list of (average payoff, arm name) tuples.
_, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list))

num_winning_arms = len(winning_arm_name_list)

winning_arm_names = self.get_winning_arm_names(arms_sampled)

num_winning_arms = len(winning_arm_names)
epsilon_allocation = self._epsilon / num_arms
arms_to_allocations = {}

Expand All @@ -89,7 +79,7 @@ def allocate_arms(self):

# With probability 1-epsilon, split allocation among winning arms.
winning_arm_allocation = (1.0 - self._epsilon) / num_winning_arms
for winning_arm_name in winning_arm_name_list:
for winning_arm_name in winning_arm_names:
arms_to_allocations[winning_arm_name] += winning_arm_allocation

return arms_to_allocations
7 changes: 6 additions & 1 deletion moe/bandit/linkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
"""Links between the implementations of bandit algorithms."""
from collections import namedtuple

from moe.bandit.constant import EPSILON_SUBTYPE_GREEDY
from moe.bandit.constant import EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY
from moe.bandit.epsilon_first import EpsilonFirst
from moe.bandit.epsilon_greedy import EpsilonGreedy

# Epsilon
Expand All @@ -16,6 +17,10 @@


EPSILON_SUBTYPES_TO_EPSILON_METHODS = {
EPSILON_SUBTYPE_FIRST: EpsilonMethod(
subtype=EPSILON_SUBTYPE_FIRST,
bandit_class=EpsilonFirst,
),
EPSILON_SUBTYPE_GREEDY: EpsilonMethod(
subtype=EPSILON_SUBTYPE_GREEDY,
bandit_class=EpsilonGreedy,
Expand Down
2 changes: 2 additions & 0 deletions moe/tests/bandit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
**Files in this package**

* :mod:`moe.tests.bandit.bandit_test_case`: base test case for bandit tests with a simple integration test case
* :mod:`moe.tests.bandit.epsilon_first_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonFirst`
* :mod:`moe.tests.bandit.epsilon_greedy_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonGreedy`
* :mod:`moe.tests.bandit.epsilon_test`: tests for :mod:`moe.bandit.epsilon_greedy.Epsilon`
* :mod:`moe.tests.bandit.epsilon_test_case`: test cases for classes under :mod:`moe.bandit.epsilon.Epsilon`
* :mod:`moe.tests.bandit.linkers_test`: tests for :mod:`moe.bandit.linkers`

Expand Down
80 changes: 80 additions & 0 deletions moe/tests/bandit/epsilon_first_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
"""Test epsilon-first bandit implementation.

Test default values with one, two, and three arms.
Test one arm with various epsilon values.

"""
import testify as T

from moe.bandit.epsilon_first import EpsilonFirst
from moe.tests.bandit.epsilon_test_case import EpsilonTestCase


class EpsilonFirstTest(EpsilonTestCase):

"""Verify that different epsilon values and historical infos return correct results."""

bandit_class = EpsilonFirst

total_samples_to_test = [1, 10, 100]

def test_init_default(self):
"""Verify that default values do not throw and error. This is purely an integration test."""
self._test_init_default()

def test_one_arm(self):
"""Check that the one-arm case always returns the given arm as the winning arm and the allocation is 1.0."""
for epsilon in self.epsilons_to_test:
for total_samples in self.total_samples_to_test:
bandit = self.bandit_class(self.one_arm_test_case, epsilon, total_samples)
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0})
T.assert_equal(bandit.choose_arm(), "arm1")

def test_two_new_arms(self):
"""Check that the two-new-arms case always allocate each arm equally (the allocation is 0.5 for both arms). This tests num_winning_arms == num_arms > 1."""
for epsilon in self.epsilons_to_test:
for total_samples in self.total_samples_to_test:
bandit = self.bandit_class(self.two_new_arms_test_case, epsilon, total_samples)
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5})

def test_two_arms_epsilon_zero(self):
"""Check that the two-arms case with zero epsilon (always exploit) always allocate arm1:1.0 and arm2:0.0 when average payoffs are arm1:1.0 and arm2:0.0."""
epsilon = 0.0
bandit = self.bandit_class(self.two_arms_test_case, epsilon)
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0})
T.assert_equal(bandit.choose_arm(), "arm1")

def test_two_arms_epsilon_one(self):
"""Check that the two-arms case with one epsilon (always explore) always allocate arm1:0.5 and arm2:0.5 when average payoffs are arm1:1.0 and arm2:0.0."""
epsilon = 1.0
bandit = self.bandit_class(self.two_arms_test_case, epsilon)
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5})

def test_three_arms_explore(self):
"""Check that the three-arms cases with integer and float payoffs in exploration phase return the expected arm allocations."""
epsilon = 0.7
total_samples = 10
equal_allocation = 1.0 / 3
for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]:
bandit = self.bandit_class(historical_info, epsilon, total_samples)
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": equal_allocation, "arm2": equal_allocation, "arm3": equal_allocation})

def test_three_arms_exploit(self):
"""Check that the three-arms cases with integer and float payoffs in exploitation phase return the expected arm allocations."""
epsilon = 0.5
total_samples = 10
for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]:
bandit = self.bandit_class(historical_info, epsilon, total_samples)
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0, "arm3": 0.0})

def test_three_arms_exploit_two_winners(self):
"""Check that the three-arms cases with two winners in exploitation phase return the expected arm allocations. This tests num_arms > num_winning_arms > 1."""
epsilon = 0.5
total_samples = 10
bandit = self.bandit_class(self.three_arms_two_winners_test_case, epsilon, total_samples)
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5, "arm3": 0.0})


if __name__ == "__main__":
T.run()
Loading