-
Notifications
You must be signed in to change notification settings - Fork 140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement Epsilon-First bandit endpoint #350
Changes from 8 commits
a898146
80adc01
a40389f
70e1cf2
2598db8
2d2e7e6
4811dd4
77aff65
ec4c694
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,16 +4,17 @@ | |
See :class:`moe.bandit.interfaces.bandit_interface` for further details on bandit. | ||
|
||
""" | ||
|
||
import copy | ||
|
||
import numpy | ||
|
||
from moe.bandit.constant import DEFAULT_EPSILON | ||
from moe.bandit.interfaces.bandit_interface import BanditInterface | ||
|
||
|
||
class Epsilon(BanditInterface): | ||
|
||
r"""Implementation of the constructor of Epsilon. Abstract method allocate_arms implemented in subclass. | ||
r"""Implementation of the constructor and common methods of Epsilon. Abstract method allocate_arms implemented in subclass. | ||
|
||
A class to encapsulate the computation of bandit epsilon. | ||
Epsilon is the sole hyperparameter in this class. Subclasses may contain other hyperparameters. | ||
|
@@ -31,7 +32,7 @@ def __init__( | |
"""Construct an Epsilon object. | ||
|
||
:param historical_info: a dictionary of arms sampled | ||
:type historical_info: dictionary of (String(), SingleArm()) pairs | ||
:type historical_info: dictionary of (String(), SampleArm()) pairs (see :class:`moe.bandit.data_containers.SampleArm` for more details) | ||
:param subtype: subtype of the epsilon bandit algorithm (default: None) | ||
:type subtype: String() | ||
:param epsilon: epsilon hyperparameter for the epsilon bandit algorithm (default: :const:`~moe.bandit.constant.DEFAULT_EPSILON`) | ||
|
@@ -41,3 +42,33 @@ def __init__( | |
self._historical_info = copy.deepcopy(historical_info) | ||
self._subtype = subtype | ||
self._epsilon = epsilon | ||
|
||
@staticmethod | ||
def get_winning_arm_names(arms_sampled): | ||
r"""Compute the set of winning arm names based on the given ``arms_sampled``.. | ||
|
||
Throws an exception when arms_sampled is empty. | ||
Implementers of this interface will never override this method. | ||
|
||
:param arms_sampled: a dictionary of arm name to :class:`moe.bandit.data_containers.SampleArm` | ||
:type arms_sampled: dictionary of (String(), SampleArm()) pairs | ||
:return: of set of names of the winning arms | ||
:rtype: frozenset(String()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
""" | ||
if not arms_sampled: | ||
raise ValueError('sample_arms is empty!') | ||
|
||
avg_payoff_arm_name_list = [] | ||
for arm_name, sampled_arm in arms_sampled.iteritems(): | ||
avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0 | ||
avg_payoff_arm_name_list.append((avg_payoff, arm_name)) | ||
avg_payoff_arm_name_list.sort(reverse=True) | ||
|
||
best_payoff, _ = max(avg_payoff_arm_name_list) | ||
# Filter out arms that have average payoff less than the best payoff | ||
winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list) | ||
# Extract a list of winning arm names from a list of (average payoff, arm name) tuples. | ||
_, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list)) | ||
winning_arm_names = frozenset(winning_arm_name_list) | ||
return winning_arm_names |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Classes (Python) to compute the Bandit Epsilon-First arm allocation and choosing the arm to pull next. | ||
|
||
See :class:`moe.bandit.epsilon.Epsilon` for further details on bandit. | ||
|
||
""" | ||
from moe.bandit.constant import DEFAULT_EPSILON, DEFAULT_TOTAL_SAMPLES, EPSILON_SUBTYPE_FIRST | ||
from moe.bandit.epsilon import Epsilon | ||
|
||
|
||
class EpsilonFirst(Epsilon): | ||
|
||
r"""Implementation of EpsilonFirst. | ||
|
||
A class to encapsulate the computation of bandit epsilon first. | ||
|
||
total_samples is the total number of samples (number to sample + number sampled) | ||
number sampled is calculated by summing up total from each arm sampled. | ||
total_samples is T from :doc:`bandit`. | ||
|
||
See superclass :class:`moe.bandit.epsilon.Epsilon` for further details. | ||
|
||
""" | ||
|
||
def __init__( | ||
self, | ||
historical_info, | ||
epsilon=DEFAULT_EPSILON, | ||
total_samples=DEFAULT_TOTAL_SAMPLES, | ||
): | ||
"""Construct an EpsilonFirst object. See superclass :class:`moe.bandit.epsilon.Epsilon` for details. | ||
|
||
total_samples is the total number of samples (number to sample + number sampled) | ||
number sampled is calculated by summing up total from each arm sampled. | ||
total_samples is T from :doc:`bandit`. | ||
|
||
""" | ||
super(EpsilonFirst, self).__init__( | ||
historical_info=historical_info, | ||
subtype=EPSILON_SUBTYPE_FIRST, | ||
epsilon=epsilon, | ||
) | ||
self._total_samples = total_samples | ||
|
||
def allocate_arms(self): | ||
r"""Compute the allocation to each arm given ``historical_info``, running bandit ``subtype`` endpoint with hyperparameters in ``hyperparameter_info``. | ||
|
||
Computes the allocation to each arm based on the given subtype, historical info, and hyperparameter info. | ||
|
||
Works with k-armed bandits (k >= 1). | ||
|
||
The Algorithm: http://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions | ||
|
||
This method starts with a pure exploration phase, followed by a pure exploitation phase. | ||
If we have a total of T trials, the first :math:`\epsilon` T trials, we only explore. | ||
After that, we only exploit (t = :math:`\epsilon` T, :math:`\epsilon` T + 1, ..., T). | ||
|
||
This method will pull a random arm in the exploration phase. | ||
Then this method will pull the optimal arm (best expected return) in the exploitation phase. | ||
|
||
In case of a tie in the exploitation phase, the method will split the allocation among the optimal arms. | ||
|
||
For example, if we have three arms, two arms (arm1 and arm2) with an average payoff of 0.5 | ||
(``{win:10, lose:10, total:20}``) | ||
and a new arm (arm3, average payoff is 0 and total is 0). | ||
|
||
Let the epsilon :math:`\epsilon` be 0.1. | ||
|
||
The allocation depends on which phase we are in: | ||
|
||
*Case 1: T = 50* | ||
|
||
Recall that T = number to sample + number sampled. number sampled :math:`= 20 + 20 + 0 = 40`. | ||
So we are on trial #41. We explore the first :math:`\epsilon T = 0.1 * 50 = 5` trials | ||
and thus we are in the exploitation phase. We split the allocation between the optimal arms arm1 and arm2. | ||
|
||
``{arm1: 0.5, arm2: 0.5, arm3: 0.0}`` | ||
|
||
*Case 2: T = 500* | ||
|
||
We explore the first :math:`\epsilon T = 0.1 * 500 = 50` trials. | ||
Since we are on trail #41, we are in the exploration phase. We choose arms randomly: | ||
|
||
``{arm1: 0.33, arm2: 0.33, arm3: 0.33}`` | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you run My experience has been that math, dicts, pseudocode, etc. all read more nicely if wrapped in at least double backticks if not at math-block, e.g.,
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
:return: the dictionary of (arm, allocation) key-value pairs | ||
:rtype: a dictionary of (String(), float64) pairs | ||
|
||
""" | ||
arms_sampled = self._historical_info.arms_sampled | ||
num_arms = self._historical_info.num_arms | ||
|
||
if not arms_sampled: | ||
raise ValueError('sample_arms is empty!') | ||
|
||
num_sampled = sum([sampled_arm.total for sampled_arm in arms_sampled.itervalues()]) | ||
# Exploration phase, trials 1,2,..., epsilon * T | ||
# Allocate equal probability to all arms | ||
if num_sampled < self._total_samples * self._epsilon: | ||
equal_allocation = 1.0 / num_arms | ||
arms_to_allocations = {} | ||
for arm_name in arms_sampled.iterkeys(): | ||
arms_to_allocations[arm_name] = equal_allocation | ||
return arms_to_allocations | ||
|
||
# Exploitation phase, trials epsilon * T+1, ..., T | ||
winning_arm_names = self.get_winning_arm_names(arms_sampled) | ||
|
||
num_winning_arms = len(winning_arm_names) | ||
arms_to_allocations = {} | ||
|
||
winning_arm_allocation = 1.0 / num_winning_arms | ||
# Split allocation among winning arms, all other arms get allocation of 0. | ||
for arm_name in arms_sampled.iterkeys(): | ||
arms_to_allocations[arm_name] = winning_arm_allocation if arm_name in winning_arm_names else 0.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this could also maybe be a method, or part of the above one There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. EpsilonGreedy is different though. I don't know if there is enough shared code in this part. |
||
|
||
return arms_to_allocations |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Test epsilon-first bandit implementation. | ||
|
||
Test default values with one, two, and three arms. | ||
Test one arm with various epsilon values. | ||
|
||
""" | ||
import testify as T | ||
|
||
from moe.bandit.epsilon_first import EpsilonFirst | ||
from moe.tests.bandit.epsilon_test_case import EpsilonTestCase | ||
|
||
|
||
class EpsilonFirstTest(EpsilonTestCase): | ||
|
||
"""Verify that different epsilon values and historical infos return correct results.""" | ||
|
||
bandit_class = EpsilonFirst | ||
|
||
total_samples_to_test = [1, 10, 100] | ||
|
||
def test_init_default(self): | ||
"""Verify that default values do not throw and error. This is purely an integration test.""" | ||
self._test_init_default() | ||
|
||
def test_one_arm(self): | ||
"""Check that the one-arm case always returns the given arm as the winning arm and the allocation is 1.0.""" | ||
for epsilon in self.epsilons_to_test: | ||
for total_samples in self.total_samples_to_test: | ||
bandit = self.bandit_class(self.one_arm_test_case, epsilon, total_samples) | ||
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0}) | ||
T.assert_equal(bandit.choose_arm(), "arm1") | ||
|
||
def test_two_new_arms(self): | ||
"""Check that the two-new-arms case always allocate each arm equally (the allocation is 0.5 for both arms). This tests num_winning_arms == num_arms > 1.""" | ||
for epsilon in self.epsilons_to_test: | ||
for total_samples in self.total_samples_to_test: | ||
bandit = self.bandit_class(self.two_new_arms_test_case, epsilon, total_samples) | ||
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5}) | ||
|
||
def test_two_arms_epsilon_zero(self): | ||
"""Check that the two-arms case with zero epsilon (always exploit) always allocate arm1:1.0 and arm2:0.0 when average payoffs are arm1:1.0 and arm2:0.0.""" | ||
epsilon = 0.0 | ||
bandit = self.bandit_class(self.two_arms_test_case, epsilon) | ||
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0}) | ||
T.assert_equal(bandit.choose_arm(), "arm1") | ||
|
||
def test_two_arms_epsilon_one(self): | ||
"""Check that the two-arms case with one epsilon (always explore) always allocate arm1:0.5 and arm2:0.5 when average payoffs are arm1:1.0 and arm2:0.0.""" | ||
epsilon = 1.0 | ||
bandit = self.bandit_class(self.two_arms_test_case, epsilon) | ||
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5}) | ||
|
||
def test_three_arms_explore(self): | ||
"""Check that the three-arms cases with integer and float payoffs in exploration phase return the expected arm allocations.""" | ||
epsilon = 0.7 | ||
total_samples = 10 | ||
equal_allocation = 1.0 / 3 | ||
for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]: | ||
bandit = self.bandit_class(historical_info, epsilon, total_samples) | ||
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": equal_allocation, "arm2": equal_allocation, "arm3": equal_allocation}) | ||
|
||
def test_three_arms_exploit(self): | ||
"""Check that the three-arms cases with integer and float payoffs in exploitation phase return the expected arm allocations.""" | ||
epsilon = 0.5 | ||
total_samples = 10 | ||
for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]: | ||
bandit = self.bandit_class(historical_info, epsilon, total_samples) | ||
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0, "arm3": 0.0}) | ||
|
||
def test_three_arms_exploit_two_winners(self): | ||
"""Check that the three-arms cases with two winners in exploitation phase return the expected arm allocations. This tests num_arms > num_winning_arms > 1.""" | ||
epsilon = 0.5 | ||
total_samples = 10 | ||
bandit = self.bandit_class(self.three_arms_two_winners_test_case, epsilon, total_samples) | ||
T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5, "arm3": 0.0}) | ||
|
||
|
||
if __name__ == "__main__": | ||
T.run() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
:param: and :type: for input
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed