From 32250d18190f6234fbbeda59f78cfdc2a0e3fe03 Mon Sep 17 00:00:00 2001
From: Philipp Jung <philippjung@posteo.de>
Date: Fri, 5 Jul 2024 11:17:39 +0200
Subject: [PATCH] Make EAR and ENAR handle non-empty error_masks

When implementing the mid-level API, I forgot to add capabilities to
ENAR and EAR to correctly resolve non-empty error_masks.

I changed this now: EAR and ENAR will only draw new errors for cells
that are not already error position. There is error-handling in case the
user requires more errors than the table has rows.
---
 error_generation/api/mid_level.py         |   2 +-
 error_generation/error_mechanism/_ear.py  |  14 +-
 error_generation/error_mechanism/_ecar.py |   2 +-
 error_generation/error_mechanism/_enar.py |  23 ++-
 error_generation/utils/utils.py           |   2 +-
 samples.ipynb                             | 194 +++++++++++-----------
 6 files changed, 131 insertions(+), 106 deletions(-)

diff --git a/error_generation/api/mid_level.py b/error_generation/api/mid_level.py
index 55708b9..6b796bd 100644
--- a/error_generation/api/mid_level.py
+++ b/error_generation/api/mid_level.py
@@ -27,6 +27,6 @@ def create_errors(table: pd.DataFrame, config: MidLevelConfig) -> tuple[pd.DataF
             old_error_mask = error_mask.copy()
             error_mask = error_mechanism.sample(table, column, error_rate, error_mask)
 
-            series = error_type.apply(table, old_error_mask != error_mask, column)
+            series = error_type.apply(table_dirty, old_error_mask != error_mask, column)
             set_column(table_dirty, column, series)
     return table_dirty, error_mask
diff --git a/error_generation/error_mechanism/_ear.py b/error_generation/error_mechanism/_ear.py
index ceb7d3f..285f192 100644
--- a/error_generation/error_mechanism/_ear.py
+++ b/error_generation/error_mechanism/_ear.py
@@ -35,10 +35,20 @@ def _sample(self: EAR, data: pd.DataFrame, column: str | int, error_rate: float,
         se_mask = get_column(error_mask, column)
         n_errors = int(se_data.size * error_rate)
 
-        upper_bound = len(se_data) - n_errors
+        se_mask_error_free = se_mask[~se_mask]
+        data_column_error_free = data.loc[se_mask_error_free.index, :]
+
+        if len(se_mask_error_free) < n_errors:
+            msg = f"The error rate of {error_rate} requires {n_errors} error-free cells. "
+            msg += f"However, only {len(se_mask_error_free)} error-free cells are available."
+            raise ValueError(msg)
+
+        # we offset the upper bound of the lower_error_index by a) the existing number of errors in the row, and b) the number of errors to-be generated.
+        upper_bound = len(se_data) - sum(se_mask) - n_errors
         lower_error_index = np.random.default_rng(self.seed).integers(0, upper_bound) if upper_bound > 0 else 0
         error_index_range = range(lower_error_index, lower_error_index + n_errors)
+        selected_rows = data_column_error_free.sort_values(by=condition_to_column).iloc[error_index_range, :]
 
-        se_mask.loc[data.sort_values(by=condition_to_column).index[error_index_range]] = True
+        se_mask.loc[selected_rows.index] = True
 
         return error_mask
diff --git a/error_generation/error_mechanism/_ecar.py b/error_generation/error_mechanism/_ecar.py
index 768ed4a..a90b155 100644
--- a/error_generation/error_mechanism/_ecar.py
+++ b/error_generation/error_mechanism/_ecar.py
@@ -26,7 +26,7 @@ def _sample(self: ECAR, data: pd.DataFrame, column: str | int, error_rate: float
         n_errors = int(se_mask.size * error_rate)
 
         if len(se_mask_error_free) < n_errors:
-            msg = f"The error rate of {error_rate} requires {len(se_mask_error_free)} error-free cells. "
+            msg = f"The error rate of {error_rate} requires {n_errors} error-free cells. "
             msg += f"However, only {len(se_mask_error_free)} error-free cells are available."
             raise ValueError(msg)
 
diff --git a/error_generation/error_mechanism/_enar.py b/error_generation/error_mechanism/_enar.py
index 17cf931..1d90641 100644
--- a/error_generation/error_mechanism/_enar.py
+++ b/error_generation/error_mechanism/_enar.py
@@ -21,17 +21,24 @@ def _sample(self: ENAR, data: pd.DataFrame, column: str | int, error_rate: float
         if self.condition_to_column is not None:
             warnings.warn("'condition_to_column' is set but will be ignored by ENAR.", stacklevel=1)
 
-        # distribute errors equally over all columns
-        n_errors = int(se_data.size * error_rate)
+        n_errors = int(len(se_data) * error_rate)
 
-        if n_errors < len(se_data):  # noqa: SIM108
-            lower_error_index = np.random.default_rng(seed=self.seed).integers(0, len(se_data) - n_errors)
-        else:  # all cells are errors
+        # if mid-level or high-level API call ENAR, the error_mask already contains errors. Below we make sure that we only sample rows that do not
+        # already contain errors.
+        se_data_error_free = se_data[~se_mask]
+
+        if len(se_data_error_free) < n_errors:
+            msg = f"The error rate of {error_rate} requires {n_errors} error-free cells. "
+            msg += f"However, only {len(se_data_error_free)} error-free cells are available."
+            raise ValueError(msg)
+
+        if len(se_data_error_free) != n_errors:  # noqa: SIM108
+            lower_error_index = np.random.default_rng(seed=self.seed).integers(0, len(se_data_error_free) - n_errors)
+        else:
             lower_error_index = 0
         error_index_range = range(lower_error_index, lower_error_index + n_errors)
+        selected_rows = se_data_error_free.sort_values().iloc[error_index_range]
 
-        se_mask.loc[se_data.sort_values().index[error_index_range]] = True
-
-        # TODO(PJ): Remember to run if isinstance(seed, int): seed += 1 in mid-level API
+        se_mask.loc[selected_rows.index] = True
 
         return error_mask
diff --git a/error_generation/utils/utils.py b/error_generation/utils/utils.py
index 8fa7e1b..3a3e4e0 100644
--- a/error_generation/utils/utils.py
+++ b/error_generation/utils/utils.py
@@ -63,7 +63,7 @@ class ErrorTypeConfig:
     keyboard_layout: str = "ansi-qwerty"
     error_period: int = 10
 
-    na_value = None
+    na_value: str | None = None
 
     mislabel_weighing: str = "uniform"
     mislabel_weights: dict[Any, float] | None = None
diff --git a/samples.ipynb b/samples.ipynb
index 762201f..0be6a13 100644
--- a/samples.ipynb
+++ b/samples.ipynb
@@ -69,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 3,
    "id": "e4cea319-2cce-4639-8b1e-aa9a8f8d5fdd",
    "metadata": {},
    "outputs": [],
@@ -127,15 +127,15 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>Aservice-01-02-2024</td>\n",
+       "      <td>Aservice-2024-02-01</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Aservice-02-02-2024</td>\n",
+       "      <td>Aservice-2024-02-02</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Aservice-03-02-2024</td>\n",
+       "      <td>Aservice-2024-02-03</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -147,15 +147,15 @@
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>Bservice-2024-02-03</td>\n",
+       "      <td>Bservice-03-02-2024</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>Cservice-2024-02-01</td>\n",
+       "      <td>Cservice-01-02-2024</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>Cservice-2024-02-02</td>\n",
+       "      <td>Cservice-02-02-2024</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
@@ -167,14 +167,14 @@
       ],
       "text/plain": [
        "               service\n",
-       "0  Aservice-01-02-2024\n",
-       "1  Aservice-02-02-2024\n",
-       "2  Aservice-03-02-2024\n",
+       "0  Aservice-2024-02-01\n",
+       "1  Aservice-2024-02-02\n",
+       "2  Aservice-2024-02-03\n",
        "3  Bservice-2024-02-01\n",
        "4  Bservice-2024-02-02\n",
-       "5  Bservice-2024-02-03\n",
-       "6  Cservice-2024-02-01\n",
-       "7  Cservice-2024-02-02\n",
+       "5  Bservice-03-02-2024\n",
+       "6  Cservice-01-02-2024\n",
+       "7  Cservice-02-02-2024\n",
        "8  Cservice-2024-02-03"
       ]
      },
@@ -198,7 +198,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 5,
    "id": "0ef70d60-cf62-4b66-856d-43db6f4a9378",
    "metadata": {},
    "outputs": [
@@ -225,7 +225,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 6,
    "id": "bd456d16-a8cb-4a3d-b496-35c755fd25ac",
    "metadata": {},
    "outputs": [
@@ -258,17 +258,17 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>Alice</td>\n",
-       "      <td>To Kill q Mockingbird</td>\n",
+       "      <td>To Kil; a Mockingbird</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>Alice</td>\n",
-       "      <td>1983</td>\n",
+       "      <td>1i84</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>Alice</td>\n",
-       "      <td>Pride ans Prejudice</td>\n",
+       "      <td>Pride wnd Prejudice</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -291,15 +291,15 @@
       ],
       "text/plain": [
        "  typist              book_title\n",
-       "0  Alice   To Kill q Mockingbird\n",
-       "1  Alice                    1983\n",
-       "2  Alice     Pride ans Prejudice\n",
+       "0  Alice   To Kil; a Mockingbird\n",
+       "1  Alice                    1i84\n",
+       "2  Alice     Pride wnd Prejudice\n",
        "3    Bob        The Great Gatsby\n",
        "4    Bob               Moby-Dick\n",
        "5    Bob  The Catcher in the Rye"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -319,7 +319,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 7,
    "id": "cb40305c-daaa-42fc-bf90-64d4f6d7861d",
    "metadata": {},
    "outputs": [],
@@ -337,7 +337,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 8,
    "id": "41464b03-b38c-4607-92cd-59165d01965d",
    "metadata": {},
    "outputs": [
@@ -370,7 +370,7 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>Alice</td>\n",
-       "      <td>¿Cómo estás?</td>\n",
+       "      <td>Â¿CÃ³mo estÃ¡s?</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -380,12 +380,12 @@
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>Bob</td>\n",
-       "      <td>ä»æ¥ã¯ã©ãã§ãã</td>\n",
+       "      <td>今日はどうですか</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>Bob</td>\n",
-       "      <td>Ça va bien, merci.</td>\n",
+       "      <td>Ãa va bien, merci.</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -395,7 +395,7 @@
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td>David</td>\n",
-       "      <td>Ich hÃ¤tte Hunger.</td>\n",
+       "      <td>Ich hätte Hunger.</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -403,15 +403,15 @@
       ],
       "text/plain": [
        "    user                         content\n",
-       "0  Alice                    ¿Cómo estás?\n",
+       "0  Alice                 Â¿CÃ³mo estÃ¡s?\n",
        "1  Alice  ÐÑÐ¸Ð²ÐµÑ, ÐºÐ°Ðº Ð´ÐµÐ»Ð°?\n",
-       "2    Bob        ä»æ¥ã¯ã©ãã§ãã\n",
-       "3    Bob              Ça va bien, merci.\n",
+       "2    Bob                        今日はどうですか\n",
+       "3    Bob             Ãa va bien, merci.\n",
        "4  Clara              ¡Nos vemos mañana!\n",
-       "5  David              Ich hÃ¤tte Hunger."
+       "5  David               Ich hätte Hunger."
       ]
      },
-     "execution_count": 30,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -441,7 +441,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 9,
    "id": "98633b5e-957c-4f2b-813f-208fbed6d855",
    "metadata": {},
    "outputs": [],
@@ -454,7 +454,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 10,
    "id": "a7d421e5-1103-4cde-849b-adb689043081",
    "metadata": {},
    "outputs": [
@@ -510,7 +510,7 @@
        "2  3.0  blau"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -529,7 +529,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 11,
    "id": "dff2611e-b16b-4104-ba1d-4696a18c8330",
    "metadata": {},
    "outputs": [],
@@ -542,7 +542,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 12,
    "id": "1221cb45-2f54-4167-8ebb-26b4f6723555",
    "metadata": {},
    "outputs": [
@@ -609,7 +609,7 @@
        "3  pineapple  pineapple blue  40"
       ]
      },
-     "execution_count": 34,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -628,7 +628,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 13,
    "id": "92c3a871-3078-4552-b9e6-d583e36e2ec2",
    "metadata": {},
    "outputs": [],
@@ -640,7 +640,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 14,
    "id": "288759e4-f634-49d6-a285-deb0e0abf999",
    "metadata": {},
    "outputs": [
@@ -696,7 +696,7 @@
        "2  2  Grnfelder Strae 17, 13357 ppeln"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -715,7 +715,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 15,
    "id": "8f5fa21b-0af8-43ae-9ac4-daa7c09c592a",
    "metadata": {},
    "outputs": [],
@@ -727,7 +727,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 16,
    "id": "5fb3f20d-8e30-47fa-aa8d-aeb541264b81",
    "metadata": {},
    "outputs": [
@@ -760,17 +760,17 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>0</td>\n",
-       "      <td>Entspannujg</td>\n",
+       "      <td>Wntspannung</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>1</td>\n",
-       "      <td>Genigtuung</td>\n",
+       "      <td>Genugtuumg</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>2</td>\n",
-       "      <td>Ausgeglichenbeit</td>\n",
+       "      <td>Ausgeglichemheit</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -778,12 +778,12 @@
       ],
       "text/plain": [
        "   a                 b\n",
-       "0  0       Entspannujg\n",
-       "1  1        Genigtuung\n",
-       "2  2  Ausgeglichenbeit"
+       "0  0       Wntspannung\n",
+       "1  1        Genugtuumg\n",
+       "2  2  Ausgeglichemheit"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -802,7 +802,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 17,
    "id": "53d9ebc6-7e12-4736-9734-babf114fa479",
    "metadata": {},
    "outputs": [],
@@ -814,7 +814,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 18,
    "id": "656082b2-8cac-495a-aa59-eb662888cfc5",
    "metadata": {},
    "outputs": [
@@ -870,7 +870,7 @@
        "2  2  0.06"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -889,7 +889,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 19,
    "id": "6332c825-5cf3-421c-bb2e-f03cb55c34e6",
    "metadata": {},
    "outputs": [],
@@ -902,7 +902,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 20,
    "id": "8f064b77-b988-4c60-8363-42f93c90439c",
    "metadata": {},
    "outputs": [
@@ -958,7 +958,7 @@
        "2  3  gelb"
       ]
      },
-     "execution_count": 42,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -977,7 +977,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 21,
    "id": "5a0b6f34-6d5d-4e3a-9cd3-295fe2b0f1bd",
    "metadata": {},
    "outputs": [],
@@ -989,7 +989,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 22,
    "id": "73e37424-6433-42ce-b85e-58c5510c48c9",
    "metadata": {},
    "outputs": [
@@ -1045,7 +1045,7 @@
        "2  3  None"
       ]
      },
-     "execution_count": 44,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1064,7 +1064,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 23,
    "id": "4d2eae7e-8ad5-4f0f-bd15-00f7001da275",
    "metadata": {},
    "outputs": [],
@@ -1076,7 +1076,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 24,
    "id": "e8e3ab8e-19f0-4b1e-9e58-b56771d783d2",
    "metadata": {},
    "outputs": [
@@ -1132,7 +1132,7 @@
        "2  3   11/10 6 p.m."
       ]
      },
-     "execution_count": 46,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1151,7 +1151,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 25,
    "id": "3045944f-f9f1-46e9-8bf4-3d9d56677425",
    "metadata": {},
    "outputs": [],
@@ -1163,7 +1163,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 26,
    "id": "48d3ea05-9670-4b11-856e-7d5678d6db45",
    "metadata": {},
    "outputs": [
@@ -1219,7 +1219,7 @@
        "2  3     6 p.m."
       ]
      },
-     "execution_count": 49,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1238,7 +1238,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 27,
    "id": "3b9a33cd-a512-44aa-939b-a51a86f6193d",
    "metadata": {},
    "outputs": [],
@@ -1252,7 +1252,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 28,
    "id": "1a29bbeb-6395-422d-8622-7acbd5cd3c2a",
    "metadata": {},
    "outputs": [
@@ -1319,7 +1319,7 @@
        "3  pineapple  blue pineapple  40.00001"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1361,7 +1361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 29,
    "id": "ff1393bf-2ac6-41bc-a341-a384b74aad5a",
    "metadata": {},
    "outputs": [],
@@ -1385,7 +1385,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 30,
    "id": "63f1e8e0-68ae-4151-8c2b-2c7e69bee3ab",
    "metadata": {},
    "outputs": [
@@ -1417,27 +1417,27 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>Alice</td>\n",
+       "      <td>None</td>\n",
        "      <td>To Kill a Mockingbird</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Alice</td>\n",
-       "      <td>2984</td>\n",
+       "      <td>None</td>\n",
+       "      <td>1o84</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>None</td>\n",
-       "      <td>Pride anr Prejudice</td>\n",
+       "      <td>Prkde and Prejudice</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>None</td>\n",
+       "      <td>Bob</td>\n",
        "      <td>The Great Gatwby</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>None</td>\n",
+       "      <td>Bob</td>\n",
        "      <td>Moby-Dick</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -1451,15 +1451,15 @@
       ],
       "text/plain": [
        "  typist              book_title\n",
-       "0  Alice   To Kill a Mockingbird\n",
-       "1  Alice                    2984\n",
-       "2   None     Pride anr Prejudice\n",
-       "3   None        The Great Gatwby\n",
-       "4   None               Moby-Dick\n",
+       "0   None   To Kill a Mockingbird\n",
+       "1   None                    1o84\n",
+       "2   None     Prkde and Prejudice\n",
+       "3    Bob        The Great Gatwby\n",
+       "4    Bob               Moby-Dick\n",
        "5    Bob  The Catcher in the Rye"
       ]
      },
-     "execution_count": 46,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1470,7 +1470,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 31,
    "id": "b7553e47-dac9-4a01-aa27-f64f3d5464a2",
    "metadata": {},
    "outputs": [
@@ -1502,12 +1502,12 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>False</td>\n",
+       "      <td>True</td>\n",
        "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>False</td>\n",
+       "      <td>True</td>\n",
        "      <td>True</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -1517,12 +1517,12 @@
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>True</td>\n",
+       "      <td>False</td>\n",
        "      <td>True</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>True</td>\n",
+       "      <td>False</td>\n",
        "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -1536,15 +1536,15 @@
       ],
       "text/plain": [
        "   typist  book_title\n",
-       "0   False       False\n",
-       "1   False        True\n",
+       "0    True       False\n",
+       "1    True        True\n",
        "2    True        True\n",
-       "3    True        True\n",
-       "4    True       False\n",
+       "3   False        True\n",
+       "4   False       False\n",
        "5   False       False"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1552,6 +1552,14 @@
    "source": [
     "error_mask"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6b11375e-e892-48dd-baab-da6581cd002e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {