fix for updating expectations in ucb1 partial fit

Emily Strong · Emily Strong · commit 79ced9e3d544 · 2019-12-17T10:28:19.000-05:00
Signed-off-by: Emily Strong &lt;emily.strong@fmr.com&gt;
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -2,6 +2,13 @@
 MABWiser CHANGELOG
 =====================
 
+
+-------------------------------------------------------------------------------
+December, 17, 2019 1.7.1
+-------------------------------------------------------------------------------
+minor:
+- Bug fix for partial fitting in UCB1
+
 -------------------------------------------------------------------------------
 November, 27, 2019 1.7.0
 -------------------------------------------------------------------------------
diff --git a/dist/mabwiser-1.7.0-py3-none-any.whl b/dist/mabwiser-1.7.0-py3-none-any.whl
diff --git a/dist/mabwiser-1.7.1-py3-none-any.whl b/dist/mabwiser-1.7.1-py3-none-any.whl
diff --git a/mabwiser/mab.py b/mabwiser/mab.py
@@ -5,7 +5,7 @@
 """
 :Author: FMR LLC
 :Email: mabwiser@fmr.com
-:Version: 1.7.0 of November 27, 2019
+:Version: 1.7.1 of December 17, 2019
 
 This module defines the public interface of the **MABWiser Library** providing access to the following modules:
 
@@ -32,7 +32,7 @@
 
 __author__ = "FMR LLC"
 __email__ = "mabwiser@fmr.com"
-__version__ = "1.7.0"
+__version__ = "1.7.1"
 __copyright__ = "Copyright (C) 2019, FMR LLC"
 
 
diff --git a/mabwiser/simulator.py b/mabwiser/simulator.py
@@ -4,7 +4,7 @@
 """
 :Author: FMR LLC
 :Email: mabwiser@fmr.com
-:Version: 1.7.0 of November 27, 2019
+:Version: 1.7.1 of December 17, 2019
 
 This module provides a simulation utility for comparing algorithms and hyper-parameter tuning.
 """
diff --git a/mabwiser/ucb.py b/mabwiser/ucb.py
@@ -63,6 +63,8 @@ def _fit_arm(self, arm: Arm, decisions: np.ndarray, rewards: np.ndarray, context
             self.arm_to_sum[arm] += arm_rewards.sum()
             self.arm_to_count[arm] += arm_rewards.size
             self.arm_to_mean[arm] = self.arm_to_sum[arm] / self.arm_to_count[arm]
+
+        if self.arm_to_count[arm]:
             self.arm_to_expectation[arm] = _UCB1._get_ucb(self.arm_to_mean[arm], self.alpha,
                                                           self.total_count, self.arm_to_count[arm])
 
diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
     name="mabwiser",
     description="MABWiser: Parallelizable Contextual Multi-Armed Bandits Library",
     long_description=long_description,
-    version="1.7.0",
+    version="1.7.1",
     author="FMR LLC",
     url="https://github.com/fmr-llc/mabwiser",
     packages=setuptools.find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
diff --git a/tests/test_ucb.py b/tests/test_ucb.py
@@ -310,28 +310,36 @@ def test_partial_fit(self):
 
         mean = mab._imp.arm_to_mean[1]
         ci = mab._imp.arm_to_expectation[1]
+        mean1 = mab._imp.arm_to_mean[2]
+        ci1 = mab._imp.arm_to_expectation[2]
         self.assertAlmostEqual(0.3333333333333333, mean)
         self.assertAlmostEqual(1.5723073962832794, ci)
-
-        mean1 = mab._imp.arm_to_mean[4]
-        ci1 = mab._imp.arm_to_expectation[4]
         self.assertEqual(mean1, 0)
-        self.assertEqual(ci1, 0)
+        self.assertAlmostEqual(ci1, 1.5174271293851465)
+
+        mean2 = mab._imp.arm_to_mean[4]
+        ci2 = mab._imp.arm_to_expectation[4]
+        self.assertEqual(mean2, 0)
+        self.assertEqual(ci2, 0)
 
         # Fit again
         decisions2 = [1, 3, 4]
         rewards2 = [0, 1, 1]
         mab.partial_fit(decisions2, rewards2)
 
-        mean2 = mab._imp.arm_to_mean[1]
-        ci2 = mab._imp.arm_to_expectation[1]
-        mean3 = mab._imp.arm_to_mean[4]
-        ci3 = mab._imp.arm_to_expectation[4]
-
-        self.assertEqual(mean2, 0.25)
-        self.assertAlmostEqual(1.3824639856219572, ci2)
-        self.assertEqual(mean3, 1)
-        self.assertAlmostEqual(3.2649279712439143, ci3)
+        mean3 = mab._imp.arm_to_mean[1]
+        ci3 = mab._imp.arm_to_expectation[1]
+        mean4 = mab._imp.arm_to_mean[4]
+        ci4 = mab._imp.arm_to_expectation[4]
+        mean5 = mab._imp.arm_to_mean[2]
+        ci5 = mab._imp.arm_to_expectation[2]
+
+        self.assertEqual(mean3, 0.25)
+        self.assertAlmostEqual(1.3824639856219572, ci3)
+        self.assertEqual(mean4, 1)
+        self.assertAlmostEqual(3.2649279712439143, ci4)
+        self.assertEqual(mean5, 0)
+        self.assertAlmostEqual(ci5, 1.6015459273656616)
 
     def test_add_arm(self):