diff --git a/master/__pycache__/hooks.cpython-39.pyc b/master/__pycache__/hooks.cpython-39.pyc index 5eeaac3..9a3d317 100644 Binary files a/master/__pycache__/hooks.cpython-39.pyc and b/master/__pycache__/hooks.cpython-39.pyc differ diff --git a/master/internals/index.html b/master/internals/index.html index b435736..3d73c1b 100644 --- a/master/internals/index.html +++ b/master/internals/index.html @@ -6082,9 +6082,7 @@

716 717 718 -719 -720 -721
class Emulator:
+719
class Emulator:
     """Class used to interact with an emulator and type word on a given keyboard.
 
     Args:
@@ -6419,9 +6417,7 @@ 

""" screen_data = self.driver.get_screenshot_as_png() screen = np.asarray(Image.open(io.BytesIO(screen_data))) - return cv2.resize( - screen, (self.screen_size["width"], self.screen_size["height"]), interpolation=cv2.INTER_AREA - ) + return screen.copy() def get_predictions(self, lang: str = "en") -> List[str]: """Retrieve the predictions displayed by the keyboard. @@ -7142,7 +7138,9 @@

Source code in kebbie/emulator.py -
615
+              
613
+614
+615
 616
 617
 618
@@ -7169,9 +7167,7 @@ 

639 640 641 -642 -643 -644

def get_predictions(self, lang: str = "en") -> List[str]:
+642
def get_predictions(self, lang: str = "en") -> List[str]:
     """Retrieve the predictions displayed by the keyboard.
 
     Args:
@@ -7248,7 +7244,9 @@ 

Source code in kebbie/emulator.py -
654
+              
652
+653
+654
 655
 656
 657
@@ -7260,9 +7258,7 @@ 

663 664 665 -666 -667 -668

def get_text(self) -> str:
+666
def get_text(self) -> str:
     """Return the text currently contained in the typing field.
 
     This method is just a wrapper around `_get_text()`, making sure the
@@ -7299,7 +7295,9 @@ 

Source code in kebbie/emulator.py -
670
+              
668
+669
+670
 671
 672
 673
@@ -7329,9 +7327,7 @@ 

697 698 699 -700 -701 -702

def show_keyboards(self):
+700
def show_keyboards(self):
     """Take a screenshot and overlay the given layout, for debugging the
     position of each keys.
     """
@@ -7472,7 +7468,9 @@ 

Source code in kebbie/emulator.py -
724
+                
722
+723
+724
 725
 726
 727
@@ -7628,59 +7626,7 @@ 

877 878 879 -880 -881 -882 -883 -884 -885 -886 -887 -888 -889 -890 -891 -892 -893 -894 -895 -896 -897 -898 -899 -900 -901 -902 -903 -904 -905 -906 -907 -908 -909 -910 -911 -912 -913 -914 -915 -916 -917 -918 -919 -920 -921 -922 -923 -924 -925 -926 -927 -928 -929 -930 -931 -932

class LayoutDetector:
+880
class LayoutDetector:
     """Base class for auto-detection of the keyboard layout.
 
     To auto-detect a new keyboard, create a new sub-class, and overwite
@@ -7729,11 +7675,7 @@ 

# Reset out keyboard to the original layer self.tap(layout["numbers"]["letters"], layout["keyboard_frame"]) - # Fix the keys' offset compared to the keyboard frame - if self.android: - self.layout = self._apply_status_bar_offset(layout) - else: - self.layout = layout + self.layout = layout def get_suggestions(self) -> List[str]: """Method to retrieve the keyboard suggestions from the XML tree. @@ -7843,52 +7785,6 @@

return CONTENT_TO_RENAME[content] else: return content - - def _get_status_bar_bounds(self) -> List[int]: - """For layout detection, this method retrieve the bounds of the status - bar from the XML tree. - - Returns: - Bounds of the status bar. - """ - sb = self.driver.find_element(By.ID, "com.android.systemui:id/status_bar") - return self._get_frame(sb) - - def _apply_status_bar_offset(self, layout: Dict) -> Dict: - """Method offsetting the given layout to match the screen. - - On Android, somehow the detected positions for the keys aren't matching - what we see on screen. This is because of the status bar, which shift - everything. So, detect the status bar, and shift back the keys to the - right position. - - Args: - layout (Dict): Layout to fix. - - Returns: - Fixed layout. - """ - sb_bounds = self._get_status_bar_bounds() - dy = sb_bounds[3] - screen_size = layout["keyboard_frame"][1] + layout["keyboard_frame"][3] - - # First of all, offset the keyboard frame - frame_dy1 = int(dy * (layout["keyboard_frame"][1] / screen_size)) - frame_dy2 = int(dy * ((layout["keyboard_frame"][1] + layout["keyboard_frame"][3]) / screen_size)) - layout["keyboard_frame"][1] -= frame_dy1 - layout["keyboard_frame"][3] -= frame_dy2 - frame_dy1 - - # Then do the same for each keys of each layouts - for layer in ["lowercase", "uppercase", "numbers"]: - for k in layout[layer]: - dy1 = int(dy * ((layout["keyboard_frame"][1] + layout[layer][k][1]) / screen_size)) - dy2 = int( - dy * ((layout["keyboard_frame"][1] + layout[layer][k][1] + layout[layer][k][3]) / screen_size) - ) - layout[layer][k][1] -= dy1 - frame_dy1 - layout[layer][k][3] -= dy2 - dy1 - - return layout

@@ -7969,7 +7865,13 @@

Source code in kebbie/emulator.py -
779
+              
773
+774
+775
+776
+777
+778
+779
 780
 781
 782
@@ -7977,13 +7879,7 @@ 

784 785 786 -787 -788 -789 -790 -791 -792 -793

def get_suggestions(self) -> List[str]:
+787
def get_suggestions(self) -> List[str]:
     """Method to retrieve the keyboard suggestions from the XML tree.
 
     Note that it's slower to access the XML through methods like
@@ -8033,43 +7929,43 @@ 

Source code in kebbie/emulator.py -
935
-936
-937
-938
-939
-940
-941
-942
-943
-944
-945
-946
-947
-948
-949
-950
-951
-952
-953
-954
-955
-956
-957
-958
-959
-960
-961
-962
-963
-964
-965
-966
-967
-968
-969
-970
-971
class GboardLayoutDetector(LayoutDetector):
+                
883
+884
+885
+886
+887
+888
+889
+890
+891
+892
+893
+894
+895
+896
+897
+898
+899
+900
+901
+902
+903
+904
+905
+906
+907
+908
+909
+910
+911
+912
+913
+914
+915
+916
+917
+918
+919
class GboardLayoutDetector(LayoutDetector):
     """Layout detector for the Gboard keyboard. See `LayoutDetector` for more
     information.
     """
@@ -8159,30 +8055,30 @@ 

Source code in kebbie/emulator.py -
948
-949
-950
-951
-952
-953
-954
-955
-956
-957
-958
-959
-960
-961
-962
-963
-964
-965
-966
-967
-968
-969
-970
-971
def get_suggestions(self) -> List[str]:
+              
896
+897
+898
+899
+900
+901
+902
+903
+904
+905
+906
+907
+908
+909
+910
+911
+912
+913
+914
+915
+916
+917
+918
+919
def get_suggestions(self) -> List[str]:
     """Method to retrieve the keyboard suggestions from the XML tree.
 
     Returns:
@@ -8241,45 +8137,45 @@ 

Source code in kebbie/emulator.py -
 974
- 975
- 976
- 977
- 978
- 979
- 980
- 981
- 982
- 983
- 984
- 985
- 986
- 987
- 988
- 989
- 990
- 991
- 992
- 993
- 994
- 995
- 996
- 997
- 998
- 999
-1000
-1001
-1002
-1003
-1004
-1005
-1006
-1007
-1008
-1009
-1010
-1011
-1012
class IosLayoutDetector(LayoutDetector):
+                
922
+923
+924
+925
+926
+927
+928
+929
+930
+931
+932
+933
+934
+935
+936
+937
+938
+939
+940
+941
+942
+943
+944
+945
+946
+947
+948
+949
+950
+951
+952
+953
+954
+955
+956
+957
+958
+959
+960
class IosLayoutDetector(LayoutDetector):
     """Layout detector for the iOS default keyboard. See `LayoutDetector` for
     more information.
     """
@@ -8371,42 +8267,42 @@ 

Source code in kebbie/emulator.py -
 988
- 989
- 990
- 991
- 992
- 993
- 994
- 995
- 996
- 997
- 998
- 999
-1000
-1001
-1002
-1003
-1004
-1005
-1006
-1007
-1008
-1009
-1010
-1011
-1012
def get_suggestions(self) -> List[str]:
-    """Method to retrieve the keyboard suggestions from the XML tree.
-
-    Returns:
-        List of suggestions from the keyboard.
-    """
-    suggestions = []
-
-    sections = [
-        data for data in self.driver.page_source.split("<XCUIElementTypeOther") if "name=" in data.split(">")[0]
-    ]
-    is_typing_predictions_section = False
+              
936
+937
+938
+939
+940
+941
+942
+943
+944
+945
+946
+947
+948
+949
+950
+951
+952
+953
+954
+955
+956
+957
+958
+959
+960
def get_suggestions(self) -> List[str]:
+    """Method to retrieve the keyboard suggestions from the XML tree.
+
+    Returns:
+        List of suggestions from the keyboard.
+    """
+    suggestions = []
+
+    sections = [
+        data for data in self.driver.page_source.split("<XCUIElementTypeOther") if "name=" in data.split(">")[0]
+    ]
+    is_typing_predictions_section = False
     for section in sections:
         m = re.search(r"name=\"([^\"]*)\"", section)
         if m:
@@ -8455,41 +8351,41 @@ 

Source code in kebbie/emulator.py -
1015
-1016
-1017
-1018
-1019
-1020
-1021
-1022
-1023
-1024
-1025
-1026
-1027
-1028
-1029
-1030
-1031
-1032
-1033
-1034
-1035
-1036
-1037
-1038
-1039
-1040
-1041
-1042
-1043
-1044
-1045
-1046
-1047
-1048
-1049
class KbkitproLayoutDetector(LayoutDetector):
+                
963
+964
+965
+966
+967
+968
+969
+970
+971
+972
+973
+974
+975
+976
+977
+978
+979
+980
+981
+982
+983
+984
+985
+986
+987
+988
+989
+990
+991
+992
+993
+994
+995
+996
+997
class KbkitproLayoutDetector(LayoutDetector):
     """Layout detector for the KeyboardKit Pro demo keyboard. See
     `LayoutDetector` for more information.
     """
@@ -8577,27 +8473,27 @@ 

Source code in kebbie/emulator.py -
1029
-1030
-1031
-1032
-1033
-1034
-1035
-1036
-1037
-1038
-1039
-1040
-1041
-1042
-1043
-1044
-1045
-1046
-1047
-1048
-1049
def get_suggestions(self) -> List[str]:
+              
977
+978
+979
+980
+981
+982
+983
+984
+985
+986
+987
+988
+989
+990
+991
+992
+993
+994
+995
+996
+997
def get_suggestions(self) -> List[str]:
     """Method to retrieve the keyboard suggestions from the XML tree.
 
     Returns:
@@ -8653,38 +8549,38 @@ 

Source code in kebbie/emulator.py -
1052
-1053
-1054
-1055
-1056
-1057
-1058
-1059
-1060
-1061
-1062
-1063
-1064
-1065
-1066
-1067
-1068
-1069
-1070
-1071
-1072
-1073
-1074
-1075
-1076
-1077
-1078
-1079
-1080
-1081
-1082
-1083
class KbkitossLayoutDetector(LayoutDetector):
+                
1000
+1001
+1002
+1003
+1004
+1005
+1006
+1007
+1008
+1009
+1010
+1011
+1012
+1013
+1014
+1015
+1016
+1017
+1018
+1019
+1020
+1021
+1022
+1023
+1024
+1025
+1026
+1027
+1028
+1029
+1030
+1031
class KbkitossLayoutDetector(LayoutDetector):
     """Layout detector for the KeyboardKit OSS demo keyboard. See
     `LayoutDetector` for more information.
     """
@@ -8769,24 +8665,24 @@ 

Source code in kebbie/emulator.py -
1066
-1067
-1068
-1069
-1070
-1071
-1072
-1073
-1074
-1075
-1076
-1077
-1078
-1079
-1080
-1081
-1082
-1083
def get_suggestions(self) -> List[str]:
+              
1014
+1015
+1016
+1017
+1018
+1019
+1020
+1021
+1022
+1023
+1024
+1025
+1026
+1027
+1028
+1029
+1030
+1031
def get_suggestions(self) -> List[str]:
     """Method to retrieve the keyboard suggestions from the XML tree.
 
     Returns:
@@ -8839,38 +8735,38 @@ 

Source code in kebbie/emulator.py -
1086
-1087
-1088
-1089
-1090
-1091
-1092
-1093
-1094
-1095
-1096
-1097
-1098
-1099
-1100
-1101
-1102
-1103
-1104
-1105
-1106
-1107
-1108
-1109
-1110
-1111
-1112
-1113
-1114
-1115
-1116
-1117
class SwiftkeyLayoutDetector(LayoutDetector):
+                
1034
+1035
+1036
+1037
+1038
+1039
+1040
+1041
+1042
+1043
+1044
+1045
+1046
+1047
+1048
+1049
+1050
+1051
+1052
+1053
+1054
+1055
+1056
+1057
+1058
+1059
+1060
+1061
+1062
+1063
+1064
+1065
class SwiftkeyLayoutDetector(LayoutDetector):
     """Layout detector for the Swiftkey keyboard. See `LayoutDetector` for more
     information.
     """
@@ -8955,25 +8851,25 @@ 

Source code in kebbie/emulator.py -
1099
-1100
-1101
-1102
-1103
-1104
-1105
-1106
-1107
-1108
-1109
-1110
-1111
-1112
-1113
-1114
-1115
-1116
-1117
def get_suggestions(self) -> List[str]:
+              
1047
+1048
+1049
+1050
+1051
+1052
+1053
+1054
+1055
+1056
+1057
+1058
+1059
+1060
+1061
+1062
+1063
+1064
+1065
def get_suggestions(self) -> List[str]:
     """Method to retrieve the keyboard suggestions from the XML tree.
 
     Returns:
@@ -9027,39 +8923,39 @@ 

Source code in kebbie/emulator.py -
1120
-1121
-1122
-1123
-1124
-1125
-1126
-1127
-1128
-1129
-1130
-1131
-1132
-1133
-1134
-1135
-1136
-1137
-1138
-1139
-1140
-1141
-1142
-1143
-1144
-1145
-1146
-1147
-1148
-1149
-1150
-1151
-1152
class TappaLayoutDetector(LayoutDetector):
+                
1068
+1069
+1070
+1071
+1072
+1073
+1074
+1075
+1076
+1077
+1078
+1079
+1080
+1081
+1082
+1083
+1084
+1085
+1086
+1087
+1088
+1089
+1090
+1091
+1092
+1093
+1094
+1095
+1096
+1097
+1098
+1099
+1100
class TappaLayoutDetector(LayoutDetector):
     """Layout detector for the Tappa keyboard. See `LayoutDetector` for more
     information.
     """
@@ -9145,26 +9041,26 @@ 

Source code in kebbie/emulator.py -
1133
-1134
-1135
-1136
-1137
-1138
-1139
-1140
-1141
-1142
-1143
-1144
-1145
-1146
-1147
-1148
-1149
-1150
-1151
-1152
def get_suggestions(self) -> List[str]:
+              
1081
+1082
+1083
+1084
+1085
+1086
+1087
+1088
+1089
+1090
+1091
+1092
+1093
+1094
+1095
+1096
+1097
+1098
+1099
+1100
def get_suggestions(self) -> List[str]:
     """Method to retrieve the keyboard suggestions from the XML tree.
 
     Returns:
@@ -9222,54 +9118,54 @@ 

Source code in kebbie/emulator.py -
1155
-1156
-1157
-1158
-1159
-1160
-1161
-1162
-1163
-1164
-1165
-1166
-1167
-1168
-1169
-1170
-1171
-1172
-1173
-1174
-1175
-1176
-1177
-1178
-1179
-1180
-1181
-1182
-1183
-1184
-1185
-1186
-1187
-1188
-1189
-1190
-1191
-1192
-1193
-1194
-1195
-1196
-1197
-1198
-1199
-1200
-1201
-1202
class FleksyLayoutDetector(LayoutDetector):
+                
1103
+1104
+1105
+1106
+1107
+1108
+1109
+1110
+1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
+1132
+1133
+1134
+1135
+1136
+1137
+1138
+1139
+1140
+1141
+1142
+1143
+1144
+1145
+1146
+1147
+1148
+1149
+1150
class FleksyLayoutDetector(LayoutDetector):
     """Layout detector for the Fleksy keyboard. See `LayoutDetector` for more
     information.
 
@@ -9370,27 +9266,27 @@ 

Source code in kebbie/emulator.py -
1182
-1183
-1184
-1185
-1186
-1187
-1188
-1189
-1190
-1191
-1192
-1193
-1194
-1195
-1196
-1197
-1198
-1199
-1200
-1201
-1202
def get_suggestions(self) -> List[str]:
+              
1130
+1131
+1132
+1133
+1134
+1135
+1136
+1137
+1138
+1139
+1140
+1141
+1142
+1143
+1144
+1145
+1146
+1147
+1148
+1149
+1150
def get_suggestions(self) -> List[str]:
     """Method to retrieve the keyboard suggestions from the XML tree.
 
     Returns:
diff --git a/master/search/search_index.json b/master/search/search_index.json
index 54cd857..c15aefa 100644
--- a/master/search/search_index.json
+++ b/master/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Kebbie","text":""},{"location":"#introduction","title":"Introduction","text":"

Welcome to the documentation of the kebbie package.

kebbie is a small framework for testing and benchmarking mobile keyboards. The primary goal of this package is to establish a cohesive and standardized method for evaluating the various NLP capabilities of a mobile keyboard and comparing them to existing alternatives.

This is achieved through two features offered by kebbie :

  • An easy-to-use evaluation function that facilitates the testing of multiple NLP functionalities offered by a mobile keyboard : auto-correction, auto-completion, next-word prediction, and swipe gesture recognition.
  • A command-line interface for running the evaluation on established keyboards, operated within emulator.
"},{"location":"#installation","title":"Installation","text":""},{"location":"#latest-version","title":"Latest version","text":"

You can install the latest version of the package directly from PyPi with :

pip install kebbie\n

Hint

If you want to install directly from Github, run :

pip install git+https://github.com/FleksySDK/kebbie.git\n

"},{"location":"#specific-version","title":"Specific version","text":"

You can install a specific version of the package (0.1.0 in ths example) from PyPi with :

pip install kebbie==0.1.0\n

Hint

If you want to install directly from Github, run :

pip install git+https://github.com/FleksySDK/kebbie.git@v0.1.0\n

"},{"location":"#local","title":"Local","text":"

You can also clone the repository locally and install it manually :

git clone https://github.com/FleksySDK/kebbie.git\ncd kebbie\npip install -e .\n
"},{"location":"#extra-dependencies","title":"Extra dependencies","text":"

You can also install extras dependencies, for example :

pip install -e .[docs]\n

Will install necessary dependencies for building the docs.

Hint

If you installed the package directly from github, run :

pip install \"kebbie[docs] @ git+https://github.com/FleksySDK/kebbie.git\"\n

List of extra dependencies :

  • test : Dependencies for running unit-tests.
  • hook : Dependencies for running pre-commit hooks.
  • lint : Dependencies for running linters and formatters.
  • docs : Dependencies for building the documentation.
  • dev : test + hook + lint + docs.
  • all : All extra dependencies.
"},{"location":"#contribute","title":"Contribute","text":"

To contribute, install the package locally (see Installation), create your own branch, add your code (and tests, and documentation), and open a PR !

"},{"location":"#pre-commit-hooks","title":"Pre-commit hooks","text":"

Pre-commit hooks are set to check the code added whenever you commit something.

When you try to commit your code, hooks are automatically run, and if you code does not meet the quality required by linters, it will not be committed. You then have to fix your code and try to commit again !

Important

If you never ran the hooks before, install it with :

pip install -e .[hook]\npre-commit install\n

Info

You can manually run the pre-commit hooks with :

pre-commit run --all-files\n

"},{"location":"#unit-tests","title":"Unit-tests","text":"

When you contribute, you need to make sure all the unit-tests pass. You should also add tests if necessary !

Info

Install the dependencies for testing with :

pip install -e .[test]\n

You can run the tests with :

pytest\n

Info

Tests are not included in the pre-commit hooks, because running the tests might be slow, and for the sake of developers we want the pre-commit hooks to be fast !

Info

Pre-commit hooks will not run the tests, but it will automatically update the coverage badge !

"},{"location":"#documentation","title":"Documentation","text":"

When you contribute, make sure to keep the documentation up-to-date.

You can visualize the documentation locally by running :

mkdocs serve\n

Info

Before running this command, you need to install the documentation dependencies :

pip install -e .[docs]\n

"},{"location":"architecture/","title":"Architecture","text":"

This page presents the internals and design decisions of the kebbie package.

"},{"location":"architecture/#the-oracle","title":"The Oracle","text":"

The Oracle is the main class of the package.

It's the class that takes care of iterating the dataset, introducing the artifical typos, and calling the given Corrector with the noisy text. Then it scores the results, knowing what was the expected text, and return the aggregated metrics as a result.

Performances

The task is embarassingly parallel. Each sentence can be tested separately. The Oracle leverages multiprocessing to ensure we run the tests as fast as possible.

Reproducibility

Although The Oracle runs in parallel, the evaluation is entirely reproducible and deterministic. Running twice the same evaluation (with the same Corrector and the same parameters) should give you the exact same results.

If you follow the flow of the data, this is what it looks like :

"},{"location":"architecture/#the-noise-model","title":"The Noise Model","text":"

The NoiseModel is the class responsible for introducing artificial typos in a clean text.

This is done in two steps :

  • From a clean word, create a noisy equivalent, which corresponds to a \"cognitive\" typo (i.e. the user might not know the exact spelling of the word)
  • Then from this noisy word, we type each character one by one in a fuzzy way (might type the character next to the intended character), which corresponds to a \"physical\" typo (i.e. fat finger syndrome)

Info

The keystrokes are generated by using two Gaussian distributions (over the X-axis and the Y-axis), centered on the middle of the intended key.

In the end, the output is a noisy version of the word, alongside with the corresponding keystrokes coordinates.

"},{"location":"emu_setup/","title":"Emulator setup","text":""},{"location":"emu_setup/#installing-appium-20","title":"Installing Appium 2.0","text":"

Appium is required to communicate between Python and the emulators.

Install Appium 2.0 by following their official documentation.

Then install the required drivers :

# For Android\nappium driver install uiautomator2\n\n# For iOS\nappium driver install xcuitest\n

To start Appium, open a new terminal and type :

appium\n

Note

Once it's running, don't close the terminal. Appium needs to run in order for Python to communicate with the emulators.

"},{"location":"emu_setup/#setting-up-android-emulator","title":"Setting up Android emulator","text":""},{"location":"emu_setup/#creating-the-emulator","title":"Creating the emulator","text":"
  • Install Android Studio
  • Create a new virtual device
  • Select the phone (Pixel 2 for example) and the system image (Tiramisu - Android 13.0 for example)
"},{"location":"emu_setup/#starting-the-emulator","title":"Starting the emulator","text":"

Once you have created the emulator, you should be able to see its name from the command line :

emulator -list-avds\n
If you encounter command not found: emulator

If the command fails with command not found: emulator, you need to update your path accordingly :

export ANDROID_HOME=/Users/<username>/Library/Android/sdk\nexport PATH=$ANDROID_HOME/platform-tools:$ANDROID_HOME/emulator:$PATH\n

You can start the emulator directly from the command line with : (so you don't need to run Android Studio, which takes a lot of resources)

emulator -avd <name> -no-snapshot-load\n

Once started, make sure you can see it. From another terminal, run :

adb devices\n
If you encounter command not found: adb

If the command fails with command not found: adb, you need to update your path accordingly :

export ANDROID_HOME=/Users/<username>/Library/Android/sdk\nexport PATH=$ANDROID_HOME/platform-tools:$ANDROID_HOME/emulator:$PATH\n

Info

In Android, to open the keyboard, we access a notepad website (www.justnotepad.com).

The reason we do that is because it's the easiest way to access a typing field, and it works across versions and emulators.

"},{"location":"emu_setup/#preparing-gboard","title":"Preparing GBoard","text":"

GBoard is enabled by default on Android, so there is nothing to do.

Tip

You can make sure GBoard is indeed the selected keyboard by going to the Settings -> System -> Languages & Input -> On-screen keyboard.

By default, GBoard has the clipboard enabled, and it may interfere with the layout detection. You can disable the clipboard in the settings of GBoard :

Make sure to disable the clipboard :

Layout

For now, the only layout supported is english US. Make sure this is the layout GBoard is using.

"},{"location":"emu_setup/#preparing-swiftkey","title":"Preparing Swiftkey","text":"

Swiftkey keyboard isn't installed on the emulator by default : you need to install it first.

Note

If you want to run the tests in parallel on several emulators, you need to repeat these steps for each emulator.

Start the emulator, then go to Google, and paste this link to install Swiftkey.

Tip

If the clipboard isn't shared with the emulator, open a terminal and run :

adb shell input text \"https://play.google.com/store/apps/details?id=com.touchtype.swiftkey&hl=en_US&gl=US\"\n

Install the keyboard on your emulator :

Open the app, follow the instructions to activate the keyboard.

By default, Swiftkey has the clipboard enabled, and it may interfere with the layout detection. You can disable the clipboard. First, access the clipboard settings :

And disable the clipboard suggestions :

"},{"location":"emu_setup/#setting-up-ios-emulator","title":"Setting up iOS emulator","text":""},{"location":"emu_setup/#creating-the-emulator_1","title":"Creating the emulator","text":"
  • Install XCode
  • Open WebDriverAgent in Xcode :
    open ~/.appium/node_modules/appium-xcuitest-driver/node_modules/appium-webdriveragent/WebDriverAgent.xcodeproj\n
  • Go to Signing & Capabilities of the project :
  • Then click \"Team\" and select your Apple ID
  • You should do this for the three following targets : WebDriverAgentLib, WebDriverAgentRunner, IntegrationApp.

Now, make sure you can properly build the WebDriverAgentRunner target : select it in the top bar and run it (button \"play\") :

If all the stars are aligned, it should start the emulator !

"},{"location":"emu_setup/#starting-the-emulator_1","title":"Starting the emulator","text":"

Once you have ensured the emulator runs properly, you should be able to start it from the command line (without Xcode open).

First, check the list of emulators available :

xcrun simctl list\n

Example of emulators listed :

-- iOS 17.4 --\n    iPhone SE (3rd generation) (96ADAD77-ECE6-420E-B56C-505E0C16231B) (Shutdown)\n    iPhone 15 (128F95FC-F499-4B09-A3B2-55937BF52B0B) (Shutdown)\n    iPhone 15 Plus (86591FC6-B3E7-43A2-9E9B-D4A2A90DAF31) (Shutdown)\n    iPhone 15 Pro (9D38F87D-273B-4D8F-8AD5-E901C1974C1E) (Shutdown)\n    iPhone 15 Pro Max (15EF57B4-69E6-4369-9534-70692A2023E5) (Shutdown)\n    iPad Air (5th generation) (252D522B-CEAA-4085-BE17-A453BC219755) (Shutdown)\n    iPad (10th generation) (39F2ADD2-2FCF-44C3-9DC9-4CC4D50875E9) (Shutdown)\n    iPad mini (6th generation) (59125B84-4ED1-40C1-8457-3CE824394385) (Shutdown)\n    iPad Pro (11-inch) (4th generation) (DB122D71-F358-48DA-B11C-D25305657E7F) (Shutdown)\n    iPad Pro (12.9-inch) (6th generation) (1100927A-B631-4678-AB19-02EA4F680537) (Shutdown)\n

Then you can start the device you want with :

xcrun simctl boot <UUID>\n

For example, to start iPhone 15 Pro, you should run :

xcrun simctl boot 9D38F87D-273B-4D8F-8AD5-E901C1974C1E\n

Warning

The xcrun simctl boot command only launch the simulator background service, to launch the foreground GUI, run :

open -a Simulator\n

Note

To shutdown the simulator, run :

xcrun simctl shutdown <UUID>\n
"},{"location":"emu_setup/#preparing-ios-keyboard","title":"Preparing iOS Keyboard","text":"

iOS Keyboard is the default keyboard on iOS, so there is nothing to do to enable it.

However, predictions and auto-corrections are disabled by default. They should be enabled :

  • Go to \"Settings\" :

  • Then go to \"General\" :

  • Then go to \"Keyboard\" :

  • Then enable \"Auto-Correction\" and \"Predictive Text\" :

Also, inline predictions are enabled by default, and it may interfere with kebbie. Make sure to turn it off (also in the keyboard settings) :

Layout

For now, the only layout supported is english US. Make sure this is the layout iOS keyboard is using.

"},{"location":"emu_setup/#preparing-fleksy-keyboard","title":"Preparing Fleksy keyboard","text":"

Fleksy is a fully-featured keyboard SDK. A demo keyboard is provided, allowing anyone to test its performance.

You first need to install the keyboard in your simulator. To do this, start your simulator (see Starting the emulator), and then run :

wget https://github.com/FleksySDK/kebbie/files/15290354/Fleksy.zip\nunzip Fleksy.zip\nxcrun simctl install <UUID> Fleksy.app\n

Tip

You can find the UUID of your simulator by running : xcrun simctl list and finding which one is Booted.

Once the app is installed, start it :

Click \"Add Keyboard\" :

Then go to \"General\" :

Then go to \"Keyboard\" :

Then go to \"Keyboards\" :

Then click \"Add New Keyboard\" :

And select \"Fleksy For Research\" :

Then select the Fleksy keyboard you just installed :

And enable \"Full Access\" :

Once enabled, you still need to select the right keyboard ! Open the keyboard using any text field, and hold the switch keyboard key. You can then select the keyboard you want to test :

And similarly to the default iOS keyboard, you should enable predictions and auto-corrections :

  • Go to \"Settings\" :

  • Then go to \"General\" :

  • Then go to \"Keyboard\" :

  • Then enable \"Auto-Correction\" and \"Predictive Text\" :

"},{"location":"emu_setup/#preparing-keyboardkit","title":"Preparing KeyboardKit","text":"

KeyboardKit is an open-source SDK that lets you create a custom keyboard. They provide a demo keyboard that we can use to test its performance.

Before being able to run kebbie evaluate to benchmark KeyboardKit, you need to install the demo keyboard on your simulator.

First, clone the repository and open the project in Xcode :

git clone https://github.com/KeyboardKit/KeyboardKit.git\ncd KeyboardKit\nopen Demo/Demo.xcodeproj\n

Then, from Xcode, select the Demo project, select the right simulator, and press the play button :

It should start the simulator, with KeyboardKit installed.

Once the simulator started, you need to enable the KeyboardKit keyboard and allow full access :

Once enabled, you still need to select the right keyboard ! Open the keyboard using any text field, and hold the switch keyboard key. You can then select the keyboard you want to test :

And similarly to the default iOS keyboard, you should enable predictions and auto-corrections :

  • Go to \"Settings\" :

  • Then go to \"General\" :

  • Then go to \"Keyboard\" :

  • Then enable \"Auto-Correction\" and \"Predictive Text\" :

"},{"location":"emu_setup/#parallel-emulators","title":"Parallel emulators","text":"

In order to run tests faster, we can setup multiple emulators, and run the evaluate() function in parallel. Let's see how to set up multiple emulators for both Android and iOS.

"},{"location":"emu_setup/#android","title":"Android","text":"

First, follow the section above to setup one Android emulator.

Once it's done, you can simply clone it from Android Studio :

Clone it several times. Once the emulators are created, you should be able to list them from the command line :

emulator -list-avds\n

Then open several terminal, and in each terminal open one emulator :

emulator -avd <name> -no-snapshot-load\n

After they started, you should be able to see them with :

adb devices\n

Tip

Once you can see the emulators with the adb devices command, there is nothing else to do ! You can run the kebbie CLI just like you would do for a single emulator : the CLI will detect the running emulators with the adb devices command.

"},{"location":"emu_setup/#ios","title":"iOS","text":"

First, follow the section above to setup one iOS simulator and make sure everything works for a single device.

Once it's done, you can list the device availables :

xcrun simctl list\n

Example of emulators listed :

-- iOS 17.4 --\n    iPhone SE (3rd generation) (96ADAD77-ECE6-420E-B56C-505E0C16231B) (Shutdown)\n    iPhone 15 (128F95FC-F499-4B09-A3B2-55937BF52B0B) (Shutdown)\n    iPhone 15 Plus (86591FC6-B3E7-43A2-9E9B-D4A2A90DAF31) (Shutdown)\n    iPhone 15 Pro (9D38F87D-273B-4D8F-8AD5-E901C1974C1E) (Shutdown)\n    iPhone 15 Pro Max (15EF57B4-69E6-4369-9534-70692A2023E5) (Shutdown)\n    iPad Air (5th generation) (252D522B-CEAA-4085-BE17-A453BC219755) (Shutdown)\n    iPad (10th generation) (39F2ADD2-2FCF-44C3-9DC9-4CC4D50875E9) (Shutdown)\n    iPad mini (6th generation) (59125B84-4ED1-40C1-8457-3CE824394385) (Shutdown)\n    iPad Pro (11-inch) (4th generation) (DB122D71-F358-48DA-B11C-D25305657E7F) (Shutdown)\n    iPad Pro (12.9-inch) (6th generation) (1100927A-B631-4678-AB19-02EA4F680537) (Shutdown)\n

Select the UUID of the device you would like to run in parallel, and clone it with :

xcrun simctl clone <UUID> <new_name>\n

So for example, to have 4 parallel iPhone 15 Pro, you should run :

xcrun simctl clone 9D38F87D-273B-4D8F-8AD5-E901C1974C1E iPhone_15_2\nxcrun simctl clone 9D38F87D-273B-4D8F-8AD5-E901C1974C1E iPhone_15_3\nxcrun simctl clone 9D38F87D-273B-4D8F-8AD5-E901C1974C1E iPhone_15_4\n

Once this is done, you should see them listed when running :

xcrun simctl list\n
-- iOS 17.4 --\n    iPhone SE (3rd generation) (96ADAD77-ECE6-420E-B56C-505E0C16231B) (Shutdown)\n    iPhone 15 (128F95FC-F499-4B09-A3B2-55937BF52B0B) (Shutdown)\n    iPhone 15 Plus (86591FC6-B3E7-43A2-9E9B-D4A2A90DAF31) (Shutdown)\n    iPhone 15 Pro (9D38F87D-273B-4D8F-8AD5-E901C1974C1E) (Booted)\n    iPhone_15_2 (C423F3BC-BC3A-4FFC-B264-C6075B60115F) (Shutdown)\n    iPhone_15_3 (2BEB33D0-8F33-4987-95FC-FD9B7C2BD54D) (Shutdown)\n    iPhone_15_4 (EE0719E9-FF3C-4539-9BCD-9F091B469F93) (Shutdown)\n    iPhone 15 Pro Max (15EF57B4-69E6-4369-9534-70692A2023E5) (Shutdown)\n    iPad Air (5th generation) (252D522B-CEAA-4085-BE17-A453BC219755) (Shutdown)\n    iPad (10th generation) (39F2ADD2-2FCF-44C3-9DC9-4CC4D50875E9) (Shutdown)\n    iPad mini (6th generation) (59125B84-4ED1-40C1-8457-3CE824394385) (Shutdown)\n    iPad Pro (11-inch) (4th generation) (DB122D71-F358-48DA-B11C-D25305657E7F) (Shutdown)\n    iPad Pro (12.9-inch) (6th generation) (1100927A-B631-4678-AB19-02EA4F680537) (Shutdown)\n

Then you can start each simulator with :

xcrun simctl boot <UUID>\n

For example, to start the 4 simulators we just created, you would run :

xcrun simctl boot 9D38F87D-273B-4D8F-8AD5-E901C1974C1E\nxcrun simctl boot C423F3BC-BC3A-4FFC-B264-C6075B60115F\nxcrun simctl boot 2BEB33D0-8F33-4987-95FC-FD9B7C2BD54D\nxcrun simctl boot EE0719E9-FF3C-4539-9BCD-9F091B469F93\n

Tip

Once the simulators started, there is nothing else to do ! You can run the kebbie CLI just like you would do for a single emulator : the CLI will automatically detect the running emulators with the xcrun simctl list command.

However, make sure to enable auto-correction and predictive suggestions in each of the simulator (see Preparing the iOS Keyboard for more information)

Warning

The xcrun simctl boot command only launch the simulator background service, to launch the foreground GUI, run :

open -a Simulator\n

Note

To shutdown a simulator, run :

xcrun simctl shutdown <UUID>\n
"},{"location":"emulated_keyboard/","title":"Emulated keyboards","text":"

In Usage, we saw how to use the kebbie framework to test our code and get various metrics to understand how good our custom auto-correction was.

Now, let's see how to use the kebbie CLI to run similar tests on an existing keyboard (within an emulator) such as GBoard.

"},{"location":"emulated_keyboard/#setup","title":"Setup","text":"

First, you need to install and setup Appium and the emulators.

Follow the intructions in Emulator setup.

Once everything you need is installed, you should have the following running :

  • Appium in a terminal
  • At least one emulator
"},{"location":"emulated_keyboard/#layout-detection","title":"Layout detection","text":"

kebbie tries to automatically detect the layout of the keyboard in use. It is working for GBoard or iOS keyboard for example.

But some keyboards cannot be detected automatically. In this case we rely on a manual definition of the layout.

But these manual definitions of the layout may not fit all devices.

"},{"location":"emulated_keyboard/#showing-the-layout","title":"Showing the layout","text":"

kebbie provides a CLI to check the layout. To visualize the keyboard's layout, run the show_layout command. For example for GBoard :

kebbie show_layout -K gboard\n

It will display 3 images (one for each layer of the keyboard : lowercase, uppercase, numbers), so you can see if the layout (automatically detected or manually defined) fits the current keyboard. You can leave the images by pressing any key.

Info

Before leaving, the command will also display in the terminal the detected suggestions of the keyboard. If they don't correspond to what's displayed in the emulator, something might be wrong !

For auto-detected keyboards, these suggestions are retrieved directly from the XML tree (fast and accurate). For keyboards with manual layout, we use OCR to find the suggestions (slow and may be wrong).

Tip

If you have several emulators running, the show_layout command will find and display the layout for each emulator, one by one.

Example where the layout match the keys properly :

Example where the layout doesn't match the keyboard's keys :

If it doesn't match...

You need to modify the definition of the layout (in emulator.py), and experiment with new coordinates until it matches well...

"},{"location":"emulated_keyboard/#list-of-supported-keyboards","title":"List of supported keyboards","text":"

Here is the list of keyboards for which the layout auto-detection is supported :

  • GBoard, with the -K gboard argument
  • iOS keyboard, with the -K ios argument
  • KeyboardKit Pro, with the -K kbkitpro argument
  • KeyboardKit Open-source, with the -K kbkitoss argument
  • Tappa keyboard, with the -K tappa argument
"},{"location":"emulated_keyboard/#testing-the-keyboard","title":"Testing the keyboard","text":"

After you made sure the layout is properly detected / defined, it's time to run the tests !

Simply run :

# For GBoard on Android emulator\nkebbie evaluate -K gboard --all_tasks\n\n# For iOS keyboard on iOS emulator\nkebbie evaluate -K ios --all_tasks\n

After a while, you should see the emulator start typing sentences !

The command line will type the sentences from the test data, and record the suggestions and the auto-corrections from the keyboard.

Once all sentences are tested, the results will be saved in a file results.json.

Info

The evaluate CLI will use only 100 sentences of the test data (versus 2 000 by default for the evaluate() function, see Usage).

This is because typing on an emulated keyboard is significantly slower. 100 sentences is enough to get some good, comparable metrics.

Note that we specified the option --all_tasks. With this option, we are computing the results for all of the tasks supported by the emulator : auto-correction, auto-completion, and next-word prediction.

Unsupported

For now, swipe gesture recognition is not supported for the emulated keyboards.

The default behavior (when --all_tasks is not specified) is to run only the auto-correction task. It is significantly faster, specially for keyboards with a layout defined manually, because they require OCR, which is quite slow.

If you want to change the number of sentences the CLI run on, just use the option --n_sentences :

kebbie evaluate -K gboard --all_tasks --n_sentences 10\n

You can change the destination file for the results with the option --result_file :

kebbie evaluate -K gboard --all_tasks --result_file my/folder/evaluation_results.json\n

You can track the most common mistakes with the option --track_mistakes :

kebbie evaluate -K gboard --all_tasks --track_mistakes\n

It will save the most common mistakes in the result file.

"},{"location":"how_testing_is_done/","title":"How testing is done ?","text":"

The basic idea is simple : we take a dataset of english sentences, we corrupt these sentences by introducing artificially generated typos, and then we measure how these typos are corrected.

"},{"location":"how_testing_is_done/#artificial-typos","title":"Artificial typos","text":"

To introduce typos in the clean text, we simulate all possible typos that a human typing on a mobile keyboard could do. This include :

  • Characters additions / deletions
  • Characters transpositions
  • Accent simplifications
  • Case simplifications
  • Fat-finger syndrome (fuzzy typing)
  • Common typos (sampled from a dataset of most common typos)

We use the following typo rates :

  • Character transpositions : 1% of all characters
  • Character additions : 0.5% of all characters
  • Character deletions : 0.5% of all characters
  • Space deletions : 1% of all space characters
  • Symbol deletions : 10% of symbol characters
  • Accent simplification : 8% of accented characters
  • Case simplification : 8% of uppercased characters
  • Common typos : 5% of words

With these rates, we obtain an overall typo rate of 12%.

Sources

These rates come from studies on real-human typing habits : Reference #1, Reference #2.

Particularly, Reference #1 (which focus on mobile device typing) shows that typing on mobile devices leads to 2.3% of uncorrected errors (see introduction), and 8% of words autocorrected (see Intelligent text entry, page 8), for an overall typo rate of 10.3%.

Details

Additionally to these typo rates, we further modify the probabilities :

  • FRONT_DELETION_MULTIPLIER is used to reduce the probability of a deletion happening on the first character of the word. This number was computed after analyzing the Tweeter typo corpus (see this script)

Here is a few examples of sentences before and after introducing typos :

Clean sentence Corrupted sentence Typos introduced He went hiking and said he'd think about it; never came back. He went hikimg and said hed think about it; never came back. Fuzzy typing & Symbol deletion Like, what you're doing here and what all this stuff is. Like, what you're doinghere and waht all this stuff is. Space deletion & Character transposition You must do something about yourself. You must do something about yourself. That's the way to get rid of pests like that. That's the waj to get rid of pedts like thhat. Common typo & Fuzzy typing & Character addition He obviously wanted an ally. he obviously wanted an ally. Case simplification This is all we got between us and the Almighty! This is lal we got beween us and the Almgihty! 2 x Character transposition & Character deletion"},{"location":"how_testing_is_done/#swipe-gesture-generation","title":"Swipe gesture generation","text":"

For the task of swipe gesture resolution, the input is not simple text : we need to generate a swipe gesture.

When generating fuzzy typing typo, we sample key taps positions on the keyboard, using Gaussian distributions, and use these key taps position to see if the correct character was typed, or if a neighbor key was typed.

For generating the swipe gesture, we sample some key taps positions just like we do for fuzzy typing, and then link the different keystrokes of the word using bezier curves. Some randomness on the speed & acceleration between points is added, in order to generate more natural swipe gestures.

Here is some examples of the generated swipe gestures (in red are the keystrokes generated by the fuzzy typing, in blue the points of the corresponding swipe gesture created).

For the word gives :

For the word they :

"},{"location":"how_testing_is_done/#data","title":"Data","text":""},{"location":"how_testing_is_done/#test-data","title":"Test data","text":"

For the data, we use the test set of the SODA dataset.

We chose to use this dataset for the evaluation for several reasons :

  • Recent
  • Extremely clean dataset
  • Cover two very distinct domains (narrative & dialogue)
"},{"location":"how_testing_is_done/#common-typos-dataset","title":"Common typos dataset","text":"

As mentioned in the section Artificial typos, we rely on a dataset of common typos, and use these common typos when generating plausible typos.

The dataset of common typos that we use is the Twitter Typo Corpus.

"},{"location":"how_testing_is_done/#tasks","title":"Tasks","text":"

We test the most important NLP features of a mobile keyboards. These are :

  • Auto-correction: Corrects the words typed by the user. For example, if a user types I\u2019m especialy touched, the typo should be detected and corrected to I\u2019m especially touched.
  • Auto-completion: Completes the word typed by the user. For example, if a user types I love y, the word should be auto-completed to I love you.
  • Next-word prediction: Predicts the next word to be typed. For example, if a user types I want to eat french, a probable next word can be fries.
  • Swipe gesture resolution: Predicts the intended word from a swipe gesture.
"},{"location":"how_testing_is_done/#metrics","title":"Metrics","text":"

If you look into the results from kebbie, for each task we have a handful of metrics that help us understand how good the tested keyboard is. Let's look at the details of these metrics.

"},{"location":"how_testing_is_done/#formulas","title":"Formulas","text":""},{"location":"how_testing_is_done/#next-word-prediction-swipe-resolution-auto-completion","title":"Next-word prediction, swipe resolution, auto-completion","text":"

For these three tasks, the metric used is Accuracy.

The formula is : accuracy = correct / total

Where correct is the number of correct predictions, and total the total number of predictions.

For the next-word prediction task and auto-completion task, we use top-3 accuracy as the main reference metric. It\u2019s the same as accuracy, but instead of considering only one candidate (which is either correct or not), we consider the 3 most probable candidates (if any one of these 3 candidates is correct).

The reason for this is because the next-word predictions and auto-completion predictions are not \u201cforced\u201d upon the user : 3 predictions are displayed at the top of the keyboard, and the user can choose any of the prediction displayed. So the correct prediction should appear among these 3 predictions displayed.

For swipe resolution however, only the best prediction is selected and applied. So we use accuracy as the main reference metric (and not top-3 accuracy).

"},{"location":"how_testing_is_done/#auto-correction","title":"Auto-correction","text":"

For auto-correction, it\u2019s different. We have a notion of true/false positive/negative. Let\u2019s first define these notions :

  • True Negative : No typo introduced, the model doesn\u2019t correct anything
  • False Positive : No typo introduced, but the model correct (wrongly) the word
  • True Positive : A typo is introduced, the model correct the word into the expected word
  • False Negative : A typo is introduced, but the model doesn\u2019t correct anything

With an example it\u2019s easier to visualize :

Word typed by the user Word after being corrected by the model Expected word True Negative love love love False Positive love loev love True Positive loev love love False Negative loev loev love

From these notions, we can compute the following metrics : accuracy, precision, recall, F-score, using the following formulas :

accuracy = (tp + tn) / (tp + tn + fp + fn)

precision = tp / (tp + fp)

recall = tp / (tp + fn)

f_score = 2 * (precision * recall) / (precision + recall)

Note

F-score is the harmonic mean of precision and recall. It\u2019s a way to gather both precision and recall in a single metric.

Important

Actually we use F\u03b2-score, which is a variant of the F-score where we can use a constant \u03b2 to weight the precision/recall ratio (see the wikipedia page about F-score).

This is useful because we value precision more.

We currently use \u03b2 = 0.9, which means precision has slightly more weight than recall.

"},{"location":"how_testing_is_done/#understanding-the-metrics","title":"Understanding the metrics","text":""},{"location":"how_testing_is_done/#swipe-resolution","title":"Swipe resolution","text":"

Accuracy - [0 - 1] - higher is better

Accuracy is straightforward : this is the ratio of correct predictions.

So an accuracy of 0.8 means the model correctly predicted the word being swiped 80% of the time.

"},{"location":"how_testing_is_done/#next-word-prediction-auto-completion","title":"Next-word prediction & auto-completion","text":"

Top-3 accuracy - [0 - 1] - higher is better

Same as accuracy, but 3 candidates are considered.

So a top-3 accuracy of 0.6 means that within the 3 candidates predicted by the model, the next word (or the word completion) is in these 3 candidates 60% of the time.

"},{"location":"how_testing_is_done/#auto-correction_1","title":"Auto-correction","text":"

Precision - [0 - 1] - higher is better

Precision is the ratio of typos among what is corrected by the model.

So a precision of 0.7 means that among all corrections made by the model, 70% were actually typos (and 30% were correct words that didn\u2019t need to be corrected).

A low precision means many words are corrected when they should not, and a high precision means only actual typos are corrected.

Recall - [0 - 1] - higher is better

Recall is the ratio of typos detected by the model.

So a recall of 0.65 means that the model correctly detected 65% of typos (and 35% of typos were not corrected by the model).

A low recall is symptom that most typos are not detected, and a high recall means most of typos are detected as typos.

F-score - [0 - 1] - higher is better

F-score is the harmonic mean of precision and recall, it\u2019s just a way to gather both precision and recall in a single metric.

Note that we weight precision slightly more than recall.

"},{"location":"internals/","title":"Internals","text":""},{"location":"internals/#cmdpy","title":"cmd.py","text":"

Module containing the implementation for the kebbie command line.

"},{"location":"internals/#kebbie.cmd.instantiate_correctors","title":"instantiate_correctors(keyboard, fast_mode=True, instantiate_emulator=True)","text":"

Create the right correctors (with the right platform, etc...) given the arguments from the command line.

Parameters:

Name Type Description Default keyboard str

Name fo the keyboard to load.

required fast_mode bool

If True, the corrector will be instantiated in fast mode (only AC).

True instantiate_emulator bool

If True, the emulators are instantiated (which trigger the layout detection). If False, only the corrector is instantiated, not the emulator.

True

Returns:

Type Description List[EmulatorCorrector]

The list of created Correctors.

Source code in kebbie/cmd.py
def instantiate_correctors(\n    keyboard: str, fast_mode: bool = True, instantiate_emulator: bool = True\n) -> List[EmulatorCorrector]:\n    \"\"\"Create the right correctors (with the right platform, etc...) given the\n    arguments from the command line.\n\n    Args:\n        keyboard (str): Name fo the keyboard to load.\n        fast_mode (bool, optional): If `True`, the corrector will be\n            instantiated in fast mode (only AC).\n        instantiate_emulator (bool, optional): If `True`, the emulators are\n            instantiated (which trigger the layout detection). If `False`, only\n            the corrector is instantiated, not the emulator.\n\n    Returns:\n        The list of created Correctors.\n    \"\"\"\n    if keyboard in [\"gboard\", \"tappa\", \"swiftkey\"]:\n        # Android keyboards\n        return [\n            EmulatorCorrector(\n                device=d,\n                platform=\"android\",\n                keyboard=keyboard,\n                fast_mode=fast_mode,\n                instantiate_emulator=instantiate_emulator,\n            )\n            for d in Emulator.get_android_devices()\n        ]\n    else:\n        # iOS keyboards\n        return [\n            EmulatorCorrector(\n                device=i,\n                platform=\"ios\",\n                keyboard=keyboard,\n                fast_mode=fast_mode,\n                instantiate_emulator=instantiate_emulator,\n                ios_name=ios_name,\n                ios_platform=ios_platform,\n            )\n            for i, (ios_platform, ios_name) in enumerate(Emulator.get_ios_devices())\n        ]\n
"},{"location":"internals/#kebbie.cmd.common_args","title":"common_args(parser)","text":"

Add common arguments to the given parser.

Parameters:

Name Type Description Default parser ArgumentParser

Parser where to add the arguments.

required Source code in kebbie/cmd.py
def common_args(parser: argparse.ArgumentParser):\n    \"\"\"Add common arguments to the given parser.\n\n    Args:\n        parser (argparse.ArgumentParser): Parser where to add the arguments.\n    \"\"\"\n    parser.add_argument(\n        \"--keyboard\",\n        \"-K\",\n        dest=\"keyboard\",\n        type=str,\n        required=True,\n        choices=[\"gboard\", \"ios\", \"kbkitpro\", \"kbkitoss\", \"tappa\", \"fleksy\", \"swiftkey\"],\n        help=\"Which keyboard, to be tested, is currently installed on the emulator.\",\n    )\n
"},{"location":"internals/#kebbie.cmd.cli","title":"cli()","text":"

Entry-point of the kebbie command line.

Source code in kebbie/cmd.py
def cli():\n    \"\"\"Entry-point of the `kebbie` command line.\"\"\"\n    # create the top-level parser\n    parser = argparse.ArgumentParser(description=\"Kebbie's command line.\")\n    subparsers = parser.add_subparsers(title=\"commands\", dest=\"cmd\")\n\n    evaluate_parser = subparsers.add_parser(\"evaluate\", help=\"Run the evaluation using emulated keyboard.\")\n    evaluate_parser.set_defaults(cmd=\"evaluate\")\n    common_args(evaluate_parser)\n    evaluate_parser.add_argument(\n        \"--result_file\",\n        \"-R\",\n        dest=\"result_file\",\n        type=str,\n        default=\"results.json\",\n        help=\"When to save the results of the evaluation\",\n    )\n    evaluate_parser.add_argument(\n        \"--all_tasks\",\n        \"-A\",\n        dest=\"all_tasks\",\n        action=\"store_true\",\n        default=False,\n        help=\"If specified, all tasks are evaluated (not only auto-correction, but also auto-completion and \"\n        \"next-word prediction).\",\n    )\n    evaluate_parser.add_argument(\n        \"--n_sentences\",\n        \"-N\",\n        dest=\"n_sentences\",\n        type=int,\n        default=100,\n        help=\"The number of sentences to use for the evaluation. Emulated keyboard are slow, so we can't run on the \"\n        \"full test set. Instead we pick the first N sentences.\",\n    )\n    evaluate_parser.add_argument(\n        \"--track_mistakes\",\n        \"-T\",\n        dest=\"track_mistakes\",\n        action=\"store_true\",\n        default=False,\n        help=\"If specified, mistakes will be tracked and saved in the result file.\",\n    )\n\n    layout_parser = subparsers.add_parser(\n        \"show_layout\", help=\"Display the layout over the keyboard for debugging purpose.\"\n    )\n    layout_parser.set_defaults(cmd=\"show_layout\")\n    common_args(layout_parser)\n\n    args = parser.parse_args()\n\n    if args.cmd is None:\n        parser.print_help(sys.stderr)\n        sys.exit(1)\n    elif args.cmd == \"evaluate\":\n        correctors = instantiate_correctors(args.keyboard, fast_mode=not args.all_tasks, instantiate_emulator=False)\n\n        # Get dataset, and filter it to keep only a small number of sentences\n        dataset = get_soda_dataset(args.n_sentences)\n\n        # Run the evaluation\n        results = evaluate(correctors, dataset=dataset, track_mistakes=args.track_mistakes)\n\n        # Save the results in a file\n        with open(args.result_file, \"w\", encoding=\"utf-8\") as f:\n            json.dump(results, f, ensure_ascii=False, indent=4)\n\n        print(\"Overall score : \", results[\"overall_score\"])\n\n    elif args.cmd == \"show_layout\":\n        correctors = instantiate_correctors(args.keyboard)\n        for c in correctors:\n            c.emulator.show_keyboards()\n            print(f\"Predictions : {c.emulator.get_predictions()}\")\n
"},{"location":"internals/#correctorspy","title":"correctors.py","text":"

Module containing the base Corrector class.

"},{"location":"internals/#kebbie.correctors.EmulatorCorrector","title":"EmulatorCorrector","text":"

Bases: Corrector

Corrector using an emulated keyboard.

Parameters:

Name Type Description Default platform str

Name of the platform used. android or ios.

required keyboard str

Name of the keyboard to test.

required device str

Device UDID to use for the emulator.

None fast_mode bool

If True, only auto-correction will be tested, and suggestions will not be retrieved. This is faster because we don't take screenshot and run the OCR.

True instantiate_emulator bool

If False, the emulator is not initialized (It will only be initialized after being pickled). This is useful to quickly create instances of this class, without going through the whole layout detection (which takes time) 2 times : at initialization and after being pickled.

True Source code in kebbie/correctors.py
class EmulatorCorrector(Corrector):\n    \"\"\"Corrector using an emulated keyboard.\n\n    Args:\n        platform (str): Name of the platform used. `android` or `ios`.\n        keyboard (str): Name of the keyboard to test.\n        device (str): Device UDID to use for the emulator.\n        fast_mode (bool): If `True`, only auto-correction will be tested,\n            and suggestions will not be retrieved. This is faster because\n            we don't take screenshot and run the OCR.\n        instantiate_emulator (bool): If `False`, the emulator is not\n            initialized (It will only be initialized after being pickled).\n            This is useful to quickly create instances of this class,\n            without going through the whole layout detection (which takes\n            time) 2 times : at initialization and after being pickled.\n    \"\"\"\n\n    def __init__(\n        self,\n        platform: str,\n        keyboard: str,\n        device: str = None,\n        fast_mode: bool = True,\n        ios_name: str = None,\n        ios_platform: str = None,\n        instantiate_emulator: bool = True,\n    ):\n        super().__init__()\n\n        self.platform = platform\n        self.keyboard = keyboard\n        self.device = device\n        self.fast_mode = fast_mode\n        self.ios_name = ios_name\n        self.ios_platform = ios_platform\n\n        self.emulator = None\n        if instantiate_emulator:\n            self.emulator = Emulator(\n                self.platform,\n                self.keyboard,\n                device=self.device,\n                ios_name=self.ios_name,\n                ios_platform=self.ios_platform,\n            )\n\n        # Typing on keyboard is slow. Because we go through several AC calls\n        # in one sentence, keep track of the previously typed context, so we\n        # can just type the remaining characters\n        self.previous_context = \"\"\n\n    def __reduce__(self) -> Tuple:\n        \"\"\"This method simply makes the object pickable.\n\n        Returns:\n            Tuple of callable and arguments.\n        \"\"\"\n        return (\n            self.__class__,\n            (self.platform, self.keyboard, self.device, self.fast_mode, self.ios_name, self.ios_platform),\n        )\n\n    def cached_type(self, context: str, word: str):\n        \"\"\"This class keeps track of the content of the context currently\n        typed in the emulator. This method uses this current context to\n        determine if we need to retype the sentence or not. Instead of\n        always erasing the content being typed, we can directly type the\n        remaining characters, which saves up time.\n\n        Args:\n            context (str): Context to paste.\n            word (str): Word to type.\n        \"\"\"\n        sentence = context + word\n        if sentence.startswith(self.previous_context):\n            # The sentence to type start similarly as the previous context\n            # Don't retype everything, just what we need\n            self.emulator.type_characters(sentence[len(self.previous_context) :])\n        else:\n            # The previous context is not right, erase everything and type it\n            self.emulator.paste(context)\n            self.emulator.type_characters(word)\n        self.previous_context = sentence\n\n    def auto_correct(\n        self,\n        context: str,\n        keystrokes: List[Optional[Tuple[float, float]]],\n        word: str,\n    ) -> List[str]:\n        \"\"\"Implementation of `auto_correct` method for emulated keyboards.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n            keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n                (x and y coordinates) for each keystroke of the word being\n                typed.\n            word (str): Word being typed (corresponding to the keystrokes).\n\n        Returns:\n            The list of correction candidates.\n        \"\"\"\n        self.cached_type(context, word)\n        candidates = self.emulator.get_predictions() if not self.fast_mode else []\n\n        candidates = [c for c in candidates if c != \"\"]\n\n        # On keyboard, the leftmost candidate is the word being typed without\n        # any change. If the word doesn't have a typo, this first candidate\n        # should be kept as the auto-correction, but if the word has a typo,\n        # we should remove it from the candidates list (as it will be\n        # auto-corrected).\n        # In order to know if it will be auto-corrected or not, we have no\n        # choice but type a space and retrieve the current text to see if it\n        # was auto-corrected or not.\n        self.emulator.type_characters(\" \")\n        self.previous_context = self.emulator.get_text()\n        autocorrection = self.previous_context[len(context) :].strip()\n\n        if len(candidates) == 0:\n            candidates = [autocorrection]\n        elif candidates[0] != autocorrection:\n            candidates.pop(0)\n            if autocorrection not in candidates:\n                candidates.insert(0, autocorrection)\n\n        return candidates\n\n    def auto_complete(\n        self,\n        context: str,\n        keystrokes: List[Optional[Tuple[float, float]]],\n        partial_word: str,\n    ) -> List[str]:\n        \"\"\"Implementation of `auto_complete` method for emulated keyboards.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n            keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n                (x and y coordinates) for each keystroke of the word being\n                typed.\n            partial_word (str): Partial word being typed (corresponding to the\n                keystrokes).\n\n        Returns:\n            The list of completion candidates.\n        \"\"\"\n        if self.fast_mode:\n            return []\n\n        self.cached_type(context, partial_word)\n        candidates = self.emulator.get_predictions()\n\n        candidates = [c for c in candidates if c != \"\"]\n\n        return candidates\n\n    def predict_next_word(self, context: str) -> List[str]:\n        \"\"\"Implementation of `predict_next_word` method for emulated keyboards.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n\n        Returns:\n            The list of next-word candidates.\n        \"\"\"\n        if self.fast_mode:\n            return []\n\n        # In order to get the predictions, the space should be typed\n        assert context[-1] == \" \"\n        self.cached_type(context[:-1], \" \")\n        candidates = self.emulator.get_predictions()\n        candidates = [c for c in candidates if c != \"\"]\n\n        return candidates\n
"},{"location":"internals/#kebbie.correctors.EmulatorCorrector.__reduce__","title":"__reduce__()","text":"

This method simply makes the object pickable.

Returns:

Type Description Tuple

Tuple of callable and arguments.

Source code in kebbie/correctors.py
def __reduce__(self) -> Tuple:\n    \"\"\"This method simply makes the object pickable.\n\n    Returns:\n        Tuple of callable and arguments.\n    \"\"\"\n    return (\n        self.__class__,\n        (self.platform, self.keyboard, self.device, self.fast_mode, self.ios_name, self.ios_platform),\n    )\n
"},{"location":"internals/#kebbie.correctors.EmulatorCorrector.cached_type","title":"cached_type(context, word)","text":"

This class keeps track of the content of the context currently typed in the emulator. This method uses this current context to determine if we need to retype the sentence or not. Instead of always erasing the content being typed, we can directly type the remaining characters, which saves up time.

Parameters:

Name Type Description Default context str

Context to paste.

required word str

Word to type.

required Source code in kebbie/correctors.py
def cached_type(self, context: str, word: str):\n    \"\"\"This class keeps track of the content of the context currently\n    typed in the emulator. This method uses this current context to\n    determine if we need to retype the sentence or not. Instead of\n    always erasing the content being typed, we can directly type the\n    remaining characters, which saves up time.\n\n    Args:\n        context (str): Context to paste.\n        word (str): Word to type.\n    \"\"\"\n    sentence = context + word\n    if sentence.startswith(self.previous_context):\n        # The sentence to type start similarly as the previous context\n        # Don't retype everything, just what we need\n        self.emulator.type_characters(sentence[len(self.previous_context) :])\n    else:\n        # The previous context is not right, erase everything and type it\n        self.emulator.paste(context)\n        self.emulator.type_characters(word)\n    self.previous_context = sentence\n
"},{"location":"internals/#kebbie.correctors.EmulatorCorrector.auto_correct","title":"auto_correct(context, keystrokes, word)","text":"

Implementation of auto_correct method for emulated keyboards.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required keystrokes List[Optional[Tuple[float, float]]]

List of positions (x and y coordinates) for each keystroke of the word being typed.

required word str

Word being typed (corresponding to the keystrokes).

required

Returns:

Type Description List[str]

The list of correction candidates.

Source code in kebbie/correctors.py
def auto_correct(\n    self,\n    context: str,\n    keystrokes: List[Optional[Tuple[float, float]]],\n    word: str,\n) -> List[str]:\n    \"\"\"Implementation of `auto_correct` method for emulated keyboards.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n        keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n            (x and y coordinates) for each keystroke of the word being\n            typed.\n        word (str): Word being typed (corresponding to the keystrokes).\n\n    Returns:\n        The list of correction candidates.\n    \"\"\"\n    self.cached_type(context, word)\n    candidates = self.emulator.get_predictions() if not self.fast_mode else []\n\n    candidates = [c for c in candidates if c != \"\"]\n\n    # On keyboard, the leftmost candidate is the word being typed without\n    # any change. If the word doesn't have a typo, this first candidate\n    # should be kept as the auto-correction, but if the word has a typo,\n    # we should remove it from the candidates list (as it will be\n    # auto-corrected).\n    # In order to know if it will be auto-corrected or not, we have no\n    # choice but type a space and retrieve the current text to see if it\n    # was auto-corrected or not.\n    self.emulator.type_characters(\" \")\n    self.previous_context = self.emulator.get_text()\n    autocorrection = self.previous_context[len(context) :].strip()\n\n    if len(candidates) == 0:\n        candidates = [autocorrection]\n    elif candidates[0] != autocorrection:\n        candidates.pop(0)\n        if autocorrection not in candidates:\n            candidates.insert(0, autocorrection)\n\n    return candidates\n
"},{"location":"internals/#kebbie.correctors.EmulatorCorrector.auto_complete","title":"auto_complete(context, keystrokes, partial_word)","text":"

Implementation of auto_complete method for emulated keyboards.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required keystrokes List[Optional[Tuple[float, float]]]

List of positions (x and y coordinates) for each keystroke of the word being typed.

required partial_word str

Partial word being typed (corresponding to the keystrokes).

required

Returns:

Type Description List[str]

The list of completion candidates.

Source code in kebbie/correctors.py
def auto_complete(\n    self,\n    context: str,\n    keystrokes: List[Optional[Tuple[float, float]]],\n    partial_word: str,\n) -> List[str]:\n    \"\"\"Implementation of `auto_complete` method for emulated keyboards.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n        keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n            (x and y coordinates) for each keystroke of the word being\n            typed.\n        partial_word (str): Partial word being typed (corresponding to the\n            keystrokes).\n\n    Returns:\n        The list of completion candidates.\n    \"\"\"\n    if self.fast_mode:\n        return []\n\n    self.cached_type(context, partial_word)\n    candidates = self.emulator.get_predictions()\n\n    candidates = [c for c in candidates if c != \"\"]\n\n    return candidates\n
"},{"location":"internals/#kebbie.correctors.EmulatorCorrector.predict_next_word","title":"predict_next_word(context)","text":"

Implementation of predict_next_word method for emulated keyboards.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required

Returns:

Type Description List[str]

The list of next-word candidates.

Source code in kebbie/correctors.py
def predict_next_word(self, context: str) -> List[str]:\n    \"\"\"Implementation of `predict_next_word` method for emulated keyboards.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n\n    Returns:\n        The list of next-word candidates.\n    \"\"\"\n    if self.fast_mode:\n        return []\n\n    # In order to get the predictions, the space should be typed\n    assert context[-1] == \" \"\n    self.cached_type(context[:-1], \" \")\n    candidates = self.emulator.get_predictions()\n    candidates = [c for c in candidates if c != \"\"]\n\n    return candidates\n
"},{"location":"internals/#emulatorpy","title":"emulator.py","text":"

Module containing the code necessary to interact with the emulators, using Appium.

"},{"location":"internals/#kebbie.emulator.Emulator","title":"Emulator","text":"

Class used to interact with an emulator and type word on a given keyboard.

Parameters:

Name Type Description Default platform str

android or ios.

required keyboard str

The name of the keyboard installed on the emulator. This is needed because each keyboard has a different layout, and we need to know each key's position in order to type words.

required device str

Device UDID to use.

None host str

Appium server's address.

'127.0.0.1' port str

Appium server's port.

'4723'

Raises:

Type Description ValueError

Error raised if the given platform doesn't exist.

Source code in kebbie/emulator.py
class Emulator:\n    \"\"\"Class used to interact with an emulator and type word on a given keyboard.\n\n    Args:\n        platform (str): `android` or `ios`.\n        keyboard (str): The name of the keyboard installed on the emulator.\n            This is needed because each keyboard has a different layout, and we\n            need to know each key's position in order to type words.\n        device (str, optional): Device UDID to use.\n        host (str, optional): Appium server's address.\n        port (str, optional): Appium server's port.\n\n    Raises:\n        ValueError: Error raised if the given platform doesn't exist.\n    \"\"\"\n\n    def __init__(  # noqa: C901\n        self,\n        platform: str,\n        keyboard: str,\n        device: str = None,\n        host: str = \"127.0.0.1\",\n        port: str = \"4723\",\n        ios_name: str = None,\n        ios_platform: str = None,\n    ):\n        super().__init__()\n\n        self.platform = platform.lower()\n        if self.platform not in [ANDROID, IOS]:\n            raise ValueError(f\"Unknown platform : {self.platform}. Please specify `{ANDROID}` or `{IOS}`.\")\n\n        # Start appium\n        capabilities = ANDROID_CAPABILITIES if self.platform == ANDROID else IOS_CAPABILITIES\n        if self.platform == IOS:\n            capabilities[\"deviceName\"] = ios_name\n            capabilities[\"platformVersion\"] = ios_platform\n            capabilities[\"wdaLocalPort\"] = 8000 + (device if device is not None else 0)\n        if self.platform == ANDROID and device is not None:\n            capabilities[\"udid\"] = device\n        self.driver = webdriver.Remote(f\"{host}:{port}\", capabilities)\n        self.driver.implicitly_wait(20)\n\n        self.screen_size = self.driver.get_window_size()\n\n        self.keyboard = keyboard.lower()\n\n        # Access a typing field\n        self.typing_field = None\n        self._access_typing_field()\n\n        # Keep track of the keyboard behavior\n        # When the typing field is empty, the keyboard is uppercase by default\n        self.kb_is_upper = True\n        self.last_char_is_space = False\n        self.last_char_is_eos = False\n\n        # Set the keyboard as default\n        if self.platform == ANDROID:\n            self.select_keyboard(keyboard)\n\n        # Get the right layout\n        if self.keyboard == GBOARD:\n            self.detected = GboardLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        elif self.keyboard == TAPPA:\n            self.detected = TappaLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        elif self.keyboard == FLEKSY:\n            self.detected = FleksyLayoutDetector(self.driver)\n            self.layout = self.detected.layout\n        elif self.keyboard == IOS:\n            self.detected = IosLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        elif self.keyboard == KBKITPRO:\n            self.detected = KbkitproLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        elif self.keyboard == KBKITOSS:\n            self.detected = KbkitossLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        elif self.keyboard == SWIFTKEY:\n            self.detected = SwiftkeyLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        else:\n            raise ValueError(\n                f\"Unknown keyboard : {self.keyboard}. Please specify `{GBOARD}`, `{TAPPA}`, `{FLEKSY}`, \"\n                f\"`{SWIFTKEY}`, `{KBKITPRO}`, `{KBKITOSS}` or `{IOS}`.\"\n            )\n\n        self.typing_field.clear()\n\n    def _access_typing_field(self):\n        \"\"\"Start the right application and access the typing field where we\n        will type our text.\n        \"\"\"\n        if self.platform == ANDROID:\n            subprocess.run(\n                [\"adb\", \"shell\", \"am\", \"start\", \"-a\", \"android.intent.action.VIEW\", \"-d\", BROWSER_PAD_URL],\n                stdout=subprocess.PIPE,\n            )\n            typing_field_loaded = False\n            while not typing_field_loaded:\n                typing_fields = self.driver.find_elements(By.CLASS_NAME, ANDROID_TYPING_FIELD_CLASS_NAME)\n                typing_field_loaded = len(typing_fields) == 2\n            self.typing_field = typing_fields[0]\n        else:\n            self.driver.find_element(By.CLASS_NAME, IOS_START_CHAT_CLASS_NAME).click()\n            self.typing_field = self.driver.find_element(By.ID, IOS_TYPING_FIELD_ID)\n        self.typing_field.click()\n        self.typing_field.clear()\n\n    def get_android_devices() -> List[str]:\n        \"\"\"Static method that uses the `adb devices` command to retrieve the\n        list of devices running.\n\n        Returns:\n            List of detected device UDID.\n        \"\"\"\n        result = subprocess.run([\"adb\", \"devices\"], stdout=subprocess.PIPE)\n        devices = result.stdout.decode().split(\"\\n\")\n        devices = [d.split()[0] for d in devices if not (d.startswith(\"List of devices attached\") or len(d) == 0)]\n        return devices\n\n    def select_keyboard(self, keyboard):\n        \"\"\"Searches the IME of the desired keyboard and selects it, only for Android.\n\n        Args:\n            keyboard (str): Keyboard to search.\n        \"\"\"\n        if keyboard not in KEYBOARD_PACKAGE:\n            print(\n                f\"Warning ! {keyboard}'s IME isn't provided (in `KEYBOARD_PACKAGE`), can't automatically select the \"\n                \"keyboard.\"\n            )\n            return\n\n        ime_list = subprocess.check_output([\"adb\", \"shell\", \"ime\", \"list\", \"-s\"], universal_newlines=True)\n        ime_name = None\n        for ime in ime_list.strip().split(\"\\n\"):\n            if KEYBOARD_PACKAGE[keyboard] in ime:\n                ime_name = ime\n                break\n        if ime_name:\n            subprocess.run(\n                [\"adb\", \"shell\", \"settings\", \"put\", \"secure\", \"show_ime_with_hard_keyboard\", \"1\"],\n                stdout=subprocess.PIPE,\n            )\n            subprocess.run([\"adb\", \"shell\", \"ime\", \"enable\", ime_name], stdout=subprocess.PIPE)\n            subprocess.run([\"adb\", \"shell\", \"ime\", \"set\", ime_name], stdout=subprocess.PIPE)\n\n    def get_ios_devices() -> List[Tuple[str, str]]:\n        \"\"\"Static method that uses the `xcrun simctl` command to retrieve the\n        list of booted devices.\n\n        Returns:\n            List of booted device platform and device name.\n        \"\"\"\n        devices = []\n\n        result = subprocess.run([\"xcrun\", \"simctl\", \"list\", \"devices\"], stdout=subprocess.PIPE)\n        out = result.stdout.decode().split(\"\\n\")\n\n        curr_platform = \"\"\n        for line in out:\n            if line.startswith(\"== \") and line.endswith(\" ==\"):\n                continue\n            elif line.startswith(\"-- \") and line.endswith(\" --\"):\n                curr_platform = line[3:-3]\n            else:\n                m = re.match(r\"\\s+([^\\t]+)\\s+\\([A-Z0-9\\-]+\\)\\s+\\((Booted|Shutdown)\\)\", line)\n                if m:\n                    device_name = m.group(1)\n                    status = m.group(2)\n\n                    if status == \"Booted\" and curr_platform.startswith(\"iOS \"):\n                        devices.append((curr_platform[4:], device_name))\n\n        return devices\n\n    def _paste(self, text: str):\n        \"\"\"Paste the given text into the typing field, to quickly simulate\n        typing a context.\n\n        Args:\n            text (str): Text to paste.\n        \"\"\"\n        if text == \"\":\n            self.typing_field.clear()\n            self.kb_is_upper = True\n            self.last_char_is_space = False\n            self.last_char_is_eos = False\n        else:\n            # Note : on Android, pasting content in the field will erase the previous content\n            # (which is what we want). On iOS it will not, we need to do it \"manually\"\n            if self.platform == IOS:\n                self.typing_field.clear()\n            if self.keyboard == KBKITPRO or self.keyboard == KBKITOSS or self.keyboard == FLEKSY:\n                # In the case of KeyboardKit / Fleksy, after pasting the content, typing a space\n                # trigger a punctuation (because previous context may end with a space)\n                # To avoid this behavior, break the cycle by typing a backspace\n                self._tap(self.layout[\"lowercase\"][\"backspace\"])\n            self.typing_field.send_keys(text)\n            self.kb_is_upper = len(text) > 1 and self._is_eos(text[-2]) and text.endswith(\" \")\n            self.last_char_is_space = text.endswith(\" \")\n            self.last_char_is_eos = self._is_eos(text[-1])\n\n    def paste(self, text: str):\n        \"\"\"Paste the given text into the typing field, to quickly simulate\n        typing a context.\n\n        This method is just a wrapper around `_paste()`, making sure the typing\n        field is accessible. If for some reason it is not accessible, it tries\n        to access it and perform the action again.\n\n        Args:\n            text (str): Text to paste.\n        \"\"\"\n        try:\n            self._paste(text)\n        except StaleElementReferenceException:\n            self._access_typing_field()\n            self._paste(text)\n\n    def type_characters(self, characters: str):  # noqa: C901\n        \"\"\"Type the given sentence on the keyboard. For each character, it\n        finds the keys to press and send a tap on the keyboard.\n\n        Args:\n            characters (str): The sentence to type.\n        \"\"\"\n        for c in characters:\n            if c == \" \":\n                if self.last_char_is_space:\n                    # If the previous character was a space, don't retype a space\n                    # because it can be transformed into a `.`\n                    continue\n\n                if self.kb_is_upper:\n                    self._tap(self.layout[\"uppercase\"][\"spacebar\"])\n                else:\n                    self._tap(self.layout[\"lowercase\"][\"spacebar\"])\n\n                # Behavior of the keyboard : if the previous character typed was an EOS marker\n                # and a space is typed, the keyboard automatically switch to uppercase\n                if self.last_char_is_eos:\n                    self.kb_is_upper = True\n            elif c in self.layout[\"lowercase\"]:\n                # The character is a lowercase character\n                if self.kb_is_upper:\n                    # If the keyboard is in uppercase mode, change it to lowercase\n                    self._tap(self.layout[\"uppercase\"][\"shift\"])\n                    if self.keyboard == SWIFTKEY:\n                        # Swiftkey needs double tap, otherwise we are capslocking\n                        self._tap(self.layout[\"uppercase\"][\"shift\"])\n                self._tap(self.layout[\"lowercase\"][c])\n            elif c in self.layout[\"uppercase\"]:\n                # The character is an uppercase character\n                if not self.kb_is_upper:\n                    # Change the keyboard to uppercase\n                    self._tap(self.layout[\"lowercase\"][\"shift\"])\n                self._tap(self.layout[\"uppercase\"][c])\n                # After typing one character, the keyboard automatically come back to lowercase\n            elif c in self.layout[\"numbers\"]:\n                # The character is a number of a special character\n                # Access the number keyboard properly\n                if self.kb_is_upper:\n                    self._tap(self.layout[\"uppercase\"][\"numbers\"])\n                else:\n                    self._tap(self.layout[\"lowercase\"][\"numbers\"])\n                self._tap(self.layout[\"numbers\"][c])\n\n                if c != \"'\" or self.keyboard in [GBOARD, SWIFTKEY]:\n                    # For some reason, when `'` is typed, the keyboard automatically goes back\n                    # to lowercase, so no need to re-tap the button (unless the keyboard is GBoard / Swiftkey).\n                    # In all other cases, switch back to letters keyboard\n                    self._tap(self.layout[\"numbers\"][\"letters\"])\n            else:\n                # Can't type this character, ignore it\n                continue\n\n            # Behavior of the keyboard : if the previous character typed was an EOS marker\n            # and a space is typed, the keyboard automatically switch to uppercase\n            self.kb_is_upper = self.last_char_is_eos and c == \" \"\n\n            # Update infos about what we typed\n            self.last_char_is_eos = self._is_eos(c)\n            self.last_char_is_space = c == \" \"\n\n    def _is_eos(self, c: str) -> bool:\n        \"\"\"Check if the given character is an End-Of-Sentence marker. If an EOS\n        marker is typed followed by a space, the keyboard automatically switch\n        to uppercase letters (unless it's GBoard).\n\n        Args:\n            c (str): Character to check.\n\n        Returns:\n            True if the character is an EOS marker.\n        \"\"\"\n        if self.keyboard == GBOARD:\n            return False\n        else:\n            return c in [\".\", \"!\", \"?\"]\n\n    def _tap(self, frame: List[int], keyboard_frame: List[int] = None):\n        \"\"\"Tap on the screen at the position described by the given frame.\n\n        Args:\n            frame (List[int]): Frame describing the position where to tap. A\n                frame is : [start_pos_x, start_pos_y, width, height].\n            keyboard_frame (List[int]): If specified, the Keyboard frame to\n                use. If `None`, it will use `self.layout[\"keyboard_frame\"]`.\n        \"\"\"\n        x, y, w, h = frame\n        base_x, base_y, *_ = keyboard_frame if keyboard_frame else self.layout[\"keyboard_frame\"]\n\n        pos_x = base_x + x + int(w / 2)\n        pos_y = base_y + y + int(h / 2)\n\n        actions = ActionChains(self.driver)\n        actions.w3c_actions = ActionBuilder(self.driver, mouse=PointerInput(interaction.POINTER_TOUCH, \"touch\"))\n        actions.w3c_actions.pointer_action.move_to_location(pos_x, pos_y)\n        actions.w3c_actions.pointer_action.pointer_down()\n        actions.w3c_actions.pointer_action.pause(0.05)\n        actions.w3c_actions.pointer_action.release()\n        actions.perform()\n\n    def _take_screenshot(self):\n        \"\"\"Take a screenshot of the full screen.\n\n        Returns:\n            The image of the screen.\n        \"\"\"\n        screen_data = self.driver.get_screenshot_as_png()\n        screen = np.asarray(Image.open(io.BytesIO(screen_data)))\n        return cv2.resize(\n            screen, (self.screen_size[\"width\"], self.screen_size[\"height\"]), interpolation=cv2.INTER_AREA\n        )\n\n    def get_predictions(self, lang: str = \"en\") -> List[str]:\n        \"\"\"Retrieve the predictions displayed by the keyboard.\n\n        Args:\n            lang (str): Language to use for the OCR.\n\n        Returns:\n            List of predictions from the keyboard.\n        \"\"\"\n        if hasattr(self, \"detected\"):\n            # Only keyboards that were auto-detected (using XML tree) have the\n            # attribute `detected`. If that's the case, it means we\n            # can retrieve the suggestions directly from the XML tree !\n            predictions = self.detected.get_suggestions()\n        else:\n            # Other keyboards still have to use (slow) OCR\n            time.sleep(PREDICTION_DELAY)\n            screen = self._take_screenshot()\n\n            kb_x, kb_y, kb_w, kb_h = self.layout[\"keyboard_frame\"]\n            screen = screen[kb_y : kb_y + kb_h, kb_x : kb_x + kb_w]\n\n            predictions = []\n            for x, y, w, h in self.layout[\"suggestions_frames\"]:\n                suggestion_area = screen[y : y + h, x : x + w]\n                ocr_results = pytesseract.image_to_string(suggestion_area, config=TESSERACT_CONFIG)\n                pred = ocr_results.strip().replace(\"\u201c\", \"\").replace('\"', \"\").replace(\"\\\\\", \"\")\n                predictions.append(pred)\n\n        return predictions\n\n    def _get_text(self) -> str:\n        \"\"\"Return the text currently contained in the typing field.\n\n        Returns:\n            Text of the typing field.\n        \"\"\"\n        return self.typing_field.text\n\n    def get_text(self) -> str:\n        \"\"\"Return the text currently contained in the typing field.\n\n        This method is just a wrapper around `_get_text()`, making sure the\n        typing field is accessible. If for some reason it is not accessible, it\n        tries to access it and perform the action again.\n\n        Returns:\n            Text of the typing field.\n        \"\"\"\n        try:\n            return self._get_text()\n        except StaleElementReferenceException:\n            self._access_typing_field()\n            return self._get_text()\n\n    def show_keyboards(self):\n        \"\"\"Take a screenshot and overlay the given layout, for debugging the\n        position of each keys.\n        \"\"\"\n        # Type a character, in order to have some suggestions\n        # Keyboard starts with uppercase letter by default (unless GBoard), and\n        # automatically go to lowercase after\n        if self.keyboard == GBOARD:\n            self._tap(self.layout[\"lowercase\"][\"a\"])\n        else:\n            self._tap(self.layout[\"uppercase\"][\"A\"])\n        screen_lower = self._take_screenshot()\n\n        self._tap(self.layout[\"lowercase\"][\"shift\"])\n        screen_upper = self._take_screenshot()\n\n        self._tap(self.layout[\"lowercase\"][\"numbers\"])\n        screen_numbers = self._take_screenshot()\n\n        for layout_name, screen in zip(\n            [\"lowercase\", \"uppercase\", \"numbers\"], [screen_lower, screen_upper, screen_numbers]\n        ):\n            self._set_area_box(screen, (0, 0), self.layout[\"keyboard_frame\"], \"keyboard frame\")\n            if \"suggestions_frames\" in self.layout:\n                for i, suggestion_frame in enumerate(self.layout[\"suggestions_frames\"]):\n                    self._set_area_box(screen, self.layout[\"keyboard_frame\"], suggestion_frame, f\"suggestion {i}\")\n            for key_name, key_frame in self.layout[layout_name].items():\n                self._set_area_box(screen, self.layout[\"keyboard_frame\"], key_frame, key_name)\n\n            cv2.imshow(layout_name, screen)\n\n        cv2.waitKey(0)\n        cv2.destroyAllWindows()\n\n    def _set_area_box(self, image, base_coords: Tuple[int], coords: Tuple[int], tag: str):\n        \"\"\"Add an area box on the given image (color is random).\n\n        Args:\n            image: Image where to add the box.\n            base_coords (Tuple[int]): Base coordinates from the full image.\n            coords (Tuple[int]): Coordinates of the element, as well as\n                dimensions.\n            tag (str): Tag for this box.\n        \"\"\"\n        base_x, base_y, *_ = base_coords\n        x, y, w, h = coords\n        x += base_x\n        y += base_y\n        # Generate color only until 200, to ensure it's dark enough\n        color = (random.randint(0, 200), random.randint(0, 200), random.randint(0, 200))\n        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)\n        cv2.putText(image, tag, (x, y + h + 17), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)\n
"},{"location":"internals/#kebbie.emulator.Emulator.get_android_devices","title":"get_android_devices()","text":"

Static method that uses the adb devices command to retrieve the list of devices running.

Returns:

Type Description List[str]

List of detected device UDID.

Source code in kebbie/emulator.py
def get_android_devices() -> List[str]:\n    \"\"\"Static method that uses the `adb devices` command to retrieve the\n    list of devices running.\n\n    Returns:\n        List of detected device UDID.\n    \"\"\"\n    result = subprocess.run([\"adb\", \"devices\"], stdout=subprocess.PIPE)\n    devices = result.stdout.decode().split(\"\\n\")\n    devices = [d.split()[0] for d in devices if not (d.startswith(\"List of devices attached\") or len(d) == 0)]\n    return devices\n
"},{"location":"internals/#kebbie.emulator.Emulator.select_keyboard","title":"select_keyboard(keyboard)","text":"

Searches the IME of the desired keyboard and selects it, only for Android.

Parameters:

Name Type Description Default keyboard str

Keyboard to search.

required Source code in kebbie/emulator.py
def select_keyboard(self, keyboard):\n    \"\"\"Searches the IME of the desired keyboard and selects it, only for Android.\n\n    Args:\n        keyboard (str): Keyboard to search.\n    \"\"\"\n    if keyboard not in KEYBOARD_PACKAGE:\n        print(\n            f\"Warning ! {keyboard}'s IME isn't provided (in `KEYBOARD_PACKAGE`), can't automatically select the \"\n            \"keyboard.\"\n        )\n        return\n\n    ime_list = subprocess.check_output([\"adb\", \"shell\", \"ime\", \"list\", \"-s\"], universal_newlines=True)\n    ime_name = None\n    for ime in ime_list.strip().split(\"\\n\"):\n        if KEYBOARD_PACKAGE[keyboard] in ime:\n            ime_name = ime\n            break\n    if ime_name:\n        subprocess.run(\n            [\"adb\", \"shell\", \"settings\", \"put\", \"secure\", \"show_ime_with_hard_keyboard\", \"1\"],\n            stdout=subprocess.PIPE,\n        )\n        subprocess.run([\"adb\", \"shell\", \"ime\", \"enable\", ime_name], stdout=subprocess.PIPE)\n        subprocess.run([\"adb\", \"shell\", \"ime\", \"set\", ime_name], stdout=subprocess.PIPE)\n
"},{"location":"internals/#kebbie.emulator.Emulator.get_ios_devices","title":"get_ios_devices()","text":"

Static method that uses the xcrun simctl command to retrieve the list of booted devices.

Returns:

Type Description List[Tuple[str, str]]

List of booted device platform and device name.

Source code in kebbie/emulator.py
def get_ios_devices() -> List[Tuple[str, str]]:\n    \"\"\"Static method that uses the `xcrun simctl` command to retrieve the\n    list of booted devices.\n\n    Returns:\n        List of booted device platform and device name.\n    \"\"\"\n    devices = []\n\n    result = subprocess.run([\"xcrun\", \"simctl\", \"list\", \"devices\"], stdout=subprocess.PIPE)\n    out = result.stdout.decode().split(\"\\n\")\n\n    curr_platform = \"\"\n    for line in out:\n        if line.startswith(\"== \") and line.endswith(\" ==\"):\n            continue\n        elif line.startswith(\"-- \") and line.endswith(\" --\"):\n            curr_platform = line[3:-3]\n        else:\n            m = re.match(r\"\\s+([^\\t]+)\\s+\\([A-Z0-9\\-]+\\)\\s+\\((Booted|Shutdown)\\)\", line)\n            if m:\n                device_name = m.group(1)\n                status = m.group(2)\n\n                if status == \"Booted\" and curr_platform.startswith(\"iOS \"):\n                    devices.append((curr_platform[4:], device_name))\n\n    return devices\n
"},{"location":"internals/#kebbie.emulator.Emulator.paste","title":"paste(text)","text":"

Paste the given text into the typing field, to quickly simulate typing a context.

This method is just a wrapper around _paste(), making sure the typing field is accessible. If for some reason it is not accessible, it tries to access it and perform the action again.

Parameters:

Name Type Description Default text str

Text to paste.

required Source code in kebbie/emulator.py
def paste(self, text: str):\n    \"\"\"Paste the given text into the typing field, to quickly simulate\n    typing a context.\n\n    This method is just a wrapper around `_paste()`, making sure the typing\n    field is accessible. If for some reason it is not accessible, it tries\n    to access it and perform the action again.\n\n    Args:\n        text (str): Text to paste.\n    \"\"\"\n    try:\n        self._paste(text)\n    except StaleElementReferenceException:\n        self._access_typing_field()\n        self._paste(text)\n
"},{"location":"internals/#kebbie.emulator.Emulator.type_characters","title":"type_characters(characters)","text":"

Type the given sentence on the keyboard. For each character, it finds the keys to press and send a tap on the keyboard.

Parameters:

Name Type Description Default characters str

The sentence to type.

required Source code in kebbie/emulator.py
def type_characters(self, characters: str):  # noqa: C901\n    \"\"\"Type the given sentence on the keyboard. For each character, it\n    finds the keys to press and send a tap on the keyboard.\n\n    Args:\n        characters (str): The sentence to type.\n    \"\"\"\n    for c in characters:\n        if c == \" \":\n            if self.last_char_is_space:\n                # If the previous character was a space, don't retype a space\n                # because it can be transformed into a `.`\n                continue\n\n            if self.kb_is_upper:\n                self._tap(self.layout[\"uppercase\"][\"spacebar\"])\n            else:\n                self._tap(self.layout[\"lowercase\"][\"spacebar\"])\n\n            # Behavior of the keyboard : if the previous character typed was an EOS marker\n            # and a space is typed, the keyboard automatically switch to uppercase\n            if self.last_char_is_eos:\n                self.kb_is_upper = True\n        elif c in self.layout[\"lowercase\"]:\n            # The character is a lowercase character\n            if self.kb_is_upper:\n                # If the keyboard is in uppercase mode, change it to lowercase\n                self._tap(self.layout[\"uppercase\"][\"shift\"])\n                if self.keyboard == SWIFTKEY:\n                    # Swiftkey needs double tap, otherwise we are capslocking\n                    self._tap(self.layout[\"uppercase\"][\"shift\"])\n            self._tap(self.layout[\"lowercase\"][c])\n        elif c in self.layout[\"uppercase\"]:\n            # The character is an uppercase character\n            if not self.kb_is_upper:\n                # Change the keyboard to uppercase\n                self._tap(self.layout[\"lowercase\"][\"shift\"])\n            self._tap(self.layout[\"uppercase\"][c])\n            # After typing one character, the keyboard automatically come back to lowercase\n        elif c in self.layout[\"numbers\"]:\n            # The character is a number of a special character\n            # Access the number keyboard properly\n            if self.kb_is_upper:\n                self._tap(self.layout[\"uppercase\"][\"numbers\"])\n            else:\n                self._tap(self.layout[\"lowercase\"][\"numbers\"])\n            self._tap(self.layout[\"numbers\"][c])\n\n            if c != \"'\" or self.keyboard in [GBOARD, SWIFTKEY]:\n                # For some reason, when `'` is typed, the keyboard automatically goes back\n                # to lowercase, so no need to re-tap the button (unless the keyboard is GBoard / Swiftkey).\n                # In all other cases, switch back to letters keyboard\n                self._tap(self.layout[\"numbers\"][\"letters\"])\n        else:\n            # Can't type this character, ignore it\n            continue\n\n        # Behavior of the keyboard : if the previous character typed was an EOS marker\n        # and a space is typed, the keyboard automatically switch to uppercase\n        self.kb_is_upper = self.last_char_is_eos and c == \" \"\n\n        # Update infos about what we typed\n        self.last_char_is_eos = self._is_eos(c)\n        self.last_char_is_space = c == \" \"\n
"},{"location":"internals/#kebbie.emulator.Emulator.get_predictions","title":"get_predictions(lang='en')","text":"

Retrieve the predictions displayed by the keyboard.

Parameters:

Name Type Description Default lang str

Language to use for the OCR.

'en'

Returns:

Type Description List[str]

List of predictions from the keyboard.

Source code in kebbie/emulator.py
def get_predictions(self, lang: str = \"en\") -> List[str]:\n    \"\"\"Retrieve the predictions displayed by the keyboard.\n\n    Args:\n        lang (str): Language to use for the OCR.\n\n    Returns:\n        List of predictions from the keyboard.\n    \"\"\"\n    if hasattr(self, \"detected\"):\n        # Only keyboards that were auto-detected (using XML tree) have the\n        # attribute `detected`. If that's the case, it means we\n        # can retrieve the suggestions directly from the XML tree !\n        predictions = self.detected.get_suggestions()\n    else:\n        # Other keyboards still have to use (slow) OCR\n        time.sleep(PREDICTION_DELAY)\n        screen = self._take_screenshot()\n\n        kb_x, kb_y, kb_w, kb_h = self.layout[\"keyboard_frame\"]\n        screen = screen[kb_y : kb_y + kb_h, kb_x : kb_x + kb_w]\n\n        predictions = []\n        for x, y, w, h in self.layout[\"suggestions_frames\"]:\n            suggestion_area = screen[y : y + h, x : x + w]\n            ocr_results = pytesseract.image_to_string(suggestion_area, config=TESSERACT_CONFIG)\n            pred = ocr_results.strip().replace(\"\u201c\", \"\").replace('\"', \"\").replace(\"\\\\\", \"\")\n            predictions.append(pred)\n\n    return predictions\n
"},{"location":"internals/#kebbie.emulator.Emulator.get_text","title":"get_text()","text":"

Return the text currently contained in the typing field.

This method is just a wrapper around _get_text(), making sure the typing field is accessible. If for some reason it is not accessible, it tries to access it and perform the action again.

Returns:

Type Description str

Text of the typing field.

Source code in kebbie/emulator.py
def get_text(self) -> str:\n    \"\"\"Return the text currently contained in the typing field.\n\n    This method is just a wrapper around `_get_text()`, making sure the\n    typing field is accessible. If for some reason it is not accessible, it\n    tries to access it and perform the action again.\n\n    Returns:\n        Text of the typing field.\n    \"\"\"\n    try:\n        return self._get_text()\n    except StaleElementReferenceException:\n        self._access_typing_field()\n        return self._get_text()\n
"},{"location":"internals/#kebbie.emulator.Emulator.show_keyboards","title":"show_keyboards()","text":"

Take a screenshot and overlay the given layout, for debugging the position of each keys.

Source code in kebbie/emulator.py
def show_keyboards(self):\n    \"\"\"Take a screenshot and overlay the given layout, for debugging the\n    position of each keys.\n    \"\"\"\n    # Type a character, in order to have some suggestions\n    # Keyboard starts with uppercase letter by default (unless GBoard), and\n    # automatically go to lowercase after\n    if self.keyboard == GBOARD:\n        self._tap(self.layout[\"lowercase\"][\"a\"])\n    else:\n        self._tap(self.layout[\"uppercase\"][\"A\"])\n    screen_lower = self._take_screenshot()\n\n    self._tap(self.layout[\"lowercase\"][\"shift\"])\n    screen_upper = self._take_screenshot()\n\n    self._tap(self.layout[\"lowercase\"][\"numbers\"])\n    screen_numbers = self._take_screenshot()\n\n    for layout_name, screen in zip(\n        [\"lowercase\", \"uppercase\", \"numbers\"], [screen_lower, screen_upper, screen_numbers]\n    ):\n        self._set_area_box(screen, (0, 0), self.layout[\"keyboard_frame\"], \"keyboard frame\")\n        if \"suggestions_frames\" in self.layout:\n            for i, suggestion_frame in enumerate(self.layout[\"suggestions_frames\"]):\n                self._set_area_box(screen, self.layout[\"keyboard_frame\"], suggestion_frame, f\"suggestion {i}\")\n        for key_name, key_frame in self.layout[layout_name].items():\n            self._set_area_box(screen, self.layout[\"keyboard_frame\"], key_frame, key_name)\n\n        cv2.imshow(layout_name, screen)\n\n    cv2.waitKey(0)\n    cv2.destroyAllWindows()\n
"},{"location":"internals/#kebbie.emulator.LayoutDetector","title":"LayoutDetector","text":"

Base class for auto-detection of the keyboard layout.

To auto-detect a new keyboard, create a new sub-class, and overwite __init__() and get_suggestions(). Use the existing subclass for GBoard as reference.

Parameters:

Name Type Description Default driver Remote

The Appium driver, used to access elements on the emulator.

required tap_fn Callable

A callback used to tap at specific position on the screen. See Emulator._tap().

required xpath_root str

XPath to the root element of the keyboard.

required xpath_keys str

XPath to detect the keys elements.

required Source code in kebbie/emulator.py
class LayoutDetector:\n    \"\"\"Base class for auto-detection of the keyboard layout.\n\n    To auto-detect a new keyboard, create a new sub-class, and overwite\n    `__init__()` and `get_suggestions()`. Use the existing subclass for GBoard\n    as reference.\n\n    Args:\n        driver (webdriver.Remote): The Appium driver, used to access elements\n            on the emulator.\n        tap_fn (Callable): A callback used to tap at specific position on the\n            screen. See `Emulator._tap()`.\n        xpath_root (str): XPath to the root element of the keyboard.\n        xpath_keys (str): XPath to detect the keys elements.\n    \"\"\"\n\n    def __init__(\n        self, driver: webdriver.Remote, tap_fn: Callable, xpath_root: str, xpath_keys: str, android: bool = True\n    ):\n        self.driver = driver\n        self.tap = tap_fn\n        self.xpath_root = xpath_root\n        self.xpath_keys = xpath_keys\n        self.android = android\n\n        layout = {}\n\n        # Get the root element of our keyboard\n        root = self.driver.find_element(By.XPATH, self.xpath_root)\n\n        # On empty field, the keyboard is on uppercase\n        # So first, retrieve the keyboard frame and uppercase characters\n        kb_frame, screen_layout = self._detect_keys(root, current_layout=\"uppercase\")\n        layout[\"keyboard_frame\"] = kb_frame\n        layout[\"uppercase\"] = screen_layout\n\n        # Then, after typing a letter, the keyboard goes to lowercase automatically\n        self.tap(layout[\"uppercase\"][\"A\"], layout[\"keyboard_frame\"])\n        _, screen_layout = self._detect_keys(root, keyboard_frame=layout[\"keyboard_frame\"], current_layout=\"lowercase\")\n        layout[\"lowercase\"] = screen_layout\n\n        # Finally, access the symbols keyboard and get characters positions\n        self.tap(layout[\"lowercase\"][\"numbers\"], layout[\"keyboard_frame\"])\n        _, screen_layout = self._detect_keys(root, keyboard_frame=layout[\"keyboard_frame\"], current_layout=\"numbers\")\n        layout[\"numbers\"] = screen_layout\n\n        # Reset out keyboard to the original layer\n        self.tap(layout[\"numbers\"][\"letters\"], layout[\"keyboard_frame\"])\n\n        # Fix the keys' offset compared to the keyboard frame\n        if self.android:\n            self.layout = self._apply_status_bar_offset(layout)\n        else:\n            self.layout = layout\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Note that it's slower to access the XML through methods like\n        `find_element()`, and it's faster to access the raw XML with\n        `self.driver.page_source` and parse it as text directly.\n\n        Raises:\n            NotImplementedError: Exception raised if this method is not\n                overwritten.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        raise NotImplementedError\n\n    def _detect_keys(\n        self, root: WebElement, current_layout: str, keyboard_frame: List[int] = None\n    ) -> Tuple[List[int], Dict]:\n        \"\"\"This method detects all keys currently on screen.\n\n        If no keyboard_frame is given, it will also detects the keyboard frame.\n\n        Args:\n            root (WebElement): Root element in the XML tree that represents the\n                keyboard (with all its keys).\n            current_layout (str): Name of the current layout.\n            keyboard_frame (List[int], optional): Optionally, the keyboard\n                frame (so we don't need to re-detect it everytime).\n\n        Returns:\n            Keyboard frame\n            Layout with all the keys detected on this screen.\n        \"\"\"\n        layout = {}\n        if keyboard_frame is None:\n            if self.android:\n                # Detect the keyboard frame\n                kb = root.find_element(By.ID, \"android:id/inputArea\")\n                keyboard_frame = self._get_frame(kb)\n            else:\n                keyboard_frame = self._get_frame(root)\n\n        for key_elem in root.find_elements(By.XPATH, self.xpath_keys):\n            label = self._get_label(key_elem, current_layout=current_layout)\n            if label is not None:\n                layout[label] = self._get_frame(key_elem)\n\n        # Then update the letters positions to be relative to the keyboard frame\n        for k in layout:\n            layout[k][0] -= keyboard_frame[0]\n            layout[k][1] -= keyboard_frame[1]\n\n        return keyboard_frame, layout\n\n    def _get_frame(self, element: WebElement) -> List[int]:\n        \"\"\"For layout detection, this method returns the bounds of the given\n        element.\n\n        Args:\n            element (WebElement): XML Element describing a key.\n\n        Returns:\n            Bounds of this key.\n        \"\"\"\n        if self.android:\n            m = re.match(r\"\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]\", element.get_attribute(\"bounds\"))\n            if m:\n                bounds = [int(g) for g in m.groups()]\n                return [bounds[0], bounds[1], bounds[2] - bounds[0], bounds[3] - bounds[1]]\n        else:\n            r = json.loads(element.get_attribute(\"rect\"))\n            return [r[\"x\"], r[\"y\"], r[\"width\"], r[\"height\"]]\n\n    def _get_label(self, element: WebElement, current_layout: str, is_suggestion: bool = False) -> str:\n        \"\"\"For layout detection, this method returns the content of the given\n        element.\n\n        This method returns `None` if it's a key we don't care about. This\n        method takes care of translating the content (the name used in the XML\n        tree is not the same as the one used in our layout).\n\n        Args:\n            element (WebElement): XML Element describing a key.\n            current_layout (str): Name of the current layout.\n            is_suggestion (bool, optional): If we are retrieving the content of\n                a suggestion, the content shouldn't be translated.\n\n        Returns:\n            Content of the key, or None if it's a key we should ignore.\n        \"\"\"\n        content = element.get_attribute(\"content-desc\") if self.android else element.get_attribute(\"name\")\n\n        if is_suggestion:\n            # If we are getting the content of the suggestion, return the content directly\n            return content\n\n        if content in CONTENT_TO_IGNORE:\n            return None\n        elif not self.android and content == \"more\":\n            if current_layout == \"uppercase\" or current_layout == \"lowercase\":\n                return \"numbers\"\n            else:\n                return \"letters\"\n        elif content in CONTENT_TO_RENAME:\n            return CONTENT_TO_RENAME[content]\n        else:\n            return content\n\n    def _get_status_bar_bounds(self) -> List[int]:\n        \"\"\"For layout detection, this method retrieve the bounds of the status\n        bar from the XML tree.\n\n        Returns:\n            Bounds of the status bar.\n        \"\"\"\n        sb = self.driver.find_element(By.ID, \"com.android.systemui:id/status_bar\")\n        return self._get_frame(sb)\n\n    def _apply_status_bar_offset(self, layout: Dict) -> Dict:\n        \"\"\"Method offsetting the given layout to match the screen.\n\n        On Android, somehow the detected positions for the keys aren't matching\n        what we see on screen. This is because of the status bar, which shift\n        everything. So, detect the status bar, and shift back the keys to the\n        right position.\n\n        Args:\n            layout (Dict): Layout to fix.\n\n        Returns:\n            Fixed layout.\n        \"\"\"\n        sb_bounds = self._get_status_bar_bounds()\n        dy = sb_bounds[3]\n        screen_size = layout[\"keyboard_frame\"][1] + layout[\"keyboard_frame\"][3]\n\n        # First of all, offset the keyboard frame\n        frame_dy1 = int(dy * (layout[\"keyboard_frame\"][1] / screen_size))\n        frame_dy2 = int(dy * ((layout[\"keyboard_frame\"][1] + layout[\"keyboard_frame\"][3]) / screen_size))\n        layout[\"keyboard_frame\"][1] -= frame_dy1\n        layout[\"keyboard_frame\"][3] -= frame_dy2 - frame_dy1\n\n        # Then do the same for each keys of each layouts\n        for layer in [\"lowercase\", \"uppercase\", \"numbers\"]:\n            for k in layout[layer]:\n                dy1 = int(dy * ((layout[\"keyboard_frame\"][1] + layout[layer][k][1]) / screen_size))\n                dy2 = int(\n                    dy * ((layout[\"keyboard_frame\"][1] + layout[layer][k][1] + layout[layer][k][3]) / screen_size)\n                )\n                layout[layer][k][1] -= dy1 - frame_dy1\n                layout[layer][k][3] -= dy2 - dy1\n\n        return layout\n
"},{"location":"internals/#kebbie.emulator.LayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Note that it's slower to access the XML through methods like find_element(), and it's faster to access the raw XML with self.driver.page_source and parse it as text directly.

Raises:

Type Description NotImplementedError

Exception raised if this method is not overwritten.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Note that it's slower to access the XML through methods like\n    `find_element()`, and it's faster to access the raw XML with\n    `self.driver.page_source` and parse it as text directly.\n\n    Raises:\n        NotImplementedError: Exception raised if this method is not\n            overwritten.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"internals/#kebbie.emulator.GboardLayoutDetector","title":"GboardLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the Gboard keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class GboardLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the Gboard keyboard. See `LayoutDetector` for more\n    information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=f\"./*/*[@package='{KEYBOARD_PACKAGE[GBOARD]}']\",\n            xpath_keys=\".//*[@resource-id][@content-desc]\",\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        sections = [\n            data\n            for data in self.driver.page_source.split(\"<android.widget.FrameLayout\")\n            if \"com.google.android.inputmethod\" in data\n        ]\n        for section in sections:\n            if \"content-desc\" in section and \"resource-id\" not in section and 'long-clickable=\"true\"' in section:\n                m = re.search(r\"content\\-desc=\\\"([^\\\"]*)\\\"\", section)\n                if m:\n                    content = m.group(1)\n\n                    # Deal with emojis\n                    emoji = re.match(r\"emoji (&[^;]+;)\", content)\n                    suggestions.append(html.unescape(emoji[1]) if emoji else content)\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.GboardLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    sections = [\n        data\n        for data in self.driver.page_source.split(\"<android.widget.FrameLayout\")\n        if \"com.google.android.inputmethod\" in data\n    ]\n    for section in sections:\n        if \"content-desc\" in section and \"resource-id\" not in section and 'long-clickable=\"true\"' in section:\n            m = re.search(r\"content\\-desc=\\\"([^\\\"]*)\\\"\", section)\n            if m:\n                content = m.group(1)\n\n                # Deal with emojis\n                emoji = re.match(r\"emoji (&[^;]+;)\", content)\n                suggestions.append(html.unescape(emoji[1]) if emoji else content)\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.IosLayoutDetector","title":"IosLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the iOS default keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class IosLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the iOS default keyboard. See `LayoutDetector` for\n    more information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=\".//XCUIElementTypeKeyboard\",\n            xpath_keys=\"(.//XCUIElementTypeKey|.//XCUIElementTypeButton)\",\n            android=False,\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        sections = [\n            data for data in self.driver.page_source.split(\"<XCUIElementTypeOther\") if \"name=\" in data.split(\">\")[0]\n        ]\n        is_typing_predictions_section = False\n        for section in sections:\n            m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", section)\n            if m:\n                name = m.group(1)\n\n                if name == \"Typing Predictions\":\n                    is_typing_predictions_section = True\n                    continue\n\n                if is_typing_predictions_section:\n                    suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.IosLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    sections = [\n        data for data in self.driver.page_source.split(\"<XCUIElementTypeOther\") if \"name=\" in data.split(\">\")[0]\n    ]\n    is_typing_predictions_section = False\n    for section in sections:\n        m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", section)\n        if m:\n            name = m.group(1)\n\n            if name == \"Typing Predictions\":\n                is_typing_predictions_section = True\n                continue\n\n            if is_typing_predictions_section:\n                suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.KbkitproLayoutDetector","title":"KbkitproLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the KeyboardKit Pro demo keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class KbkitproLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the KeyboardKit Pro demo keyboard. See\n    `LayoutDetector` for more information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=\".//XCUIElementTypeOther[XCUIElementTypeButton and XCUIElementTypeTextField]\",\n            xpath_keys=\".//XCUIElementTypeButton\",\n            android=False,\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        for data in self.driver.page_source.split(\"<XCUIElementTypeOther\"):\n            if \"<XCUIElementTypeTextField\" in data:\n                pred_part = data.split(\"<XCUIElementTypeTextField\")[0]\n                if \"<XCUIElementTypeButton\" in pred_part and 'name=\"Add\"' in pred_part:\n                    for elem in pred_part.split(\">\")[2:]:\n                        if \"<XCUIElementTypeTextField\" in elem:\n                            break\n                        m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", elem)\n                        if m:\n                            name = m.group(1)\n                            suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.KbkitproLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    for data in self.driver.page_source.split(\"<XCUIElementTypeOther\"):\n        if \"<XCUIElementTypeTextField\" in data:\n            pred_part = data.split(\"<XCUIElementTypeTextField\")[0]\n            if \"<XCUIElementTypeButton\" in pred_part and 'name=\"Add\"' in pred_part:\n                for elem in pred_part.split(\">\")[2:]:\n                    if \"<XCUIElementTypeTextField\" in elem:\n                        break\n                    m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", elem)\n                    if m:\n                        name = m.group(1)\n                        suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.KbkitossLayoutDetector","title":"KbkitossLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the KeyboardKit OSS demo keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class KbkitossLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the KeyboardKit OSS demo keyboard. See\n    `LayoutDetector` for more information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=\".//XCUIElementTypeOther[XCUIElementTypeButton and XCUIElementTypeStaticText]\",\n            xpath_keys=\".//XCUIElementTypeButton\",\n            android=False,\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        for data in self.driver.page_source.split(\"<XCUIElementTypeOther\"):\n            if \", Subtitle\" in data:\n                pred_part = data.split(\", Subtitle\")[0]\n                for elem in pred_part.split(\">\")[1:]:\n                    m = re.search(r\"name=\\\"([^\\\"]*)\\\"?\", elem)\n                    if m:\n                        name = m.group(1)\n                        suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.KbkitossLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    for data in self.driver.page_source.split(\"<XCUIElementTypeOther\"):\n        if \", Subtitle\" in data:\n            pred_part = data.split(\", Subtitle\")[0]\n            for elem in pred_part.split(\">\")[1:]:\n                m = re.search(r\"name=\\\"([^\\\"]*)\\\"?\", elem)\n                if m:\n                    name = m.group(1)\n                    suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.SwiftkeyLayoutDetector","title":"SwiftkeyLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the Swiftkey keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class SwiftkeyLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the Swiftkey keyboard. See `LayoutDetector` for more\n    information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=f\"./*/*[@package='{KEYBOARD_PACKAGE[SWIFTKEY]}']\",\n            xpath_keys=\".//*[@class='android.view.View'][@content-desc]\",\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        # Get the raw content as text, weed out useless elements\n        for data in self.driver.page_source.split(\"<android.widget.FrameLayout\"):\n            if \"com.touchtype.swiftkey\" in data and \"<android.view.View \" in data:\n                sections = data.split(\"<android.view.View \")\n                for section in sections[1:]:\n                    m = re.search(r\"content-desc=\\\"([^\\\"]*)\\\"\", section)\n                    if m:\n                        suggestions.append(html.unescape(m.group(1)))\n                break\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.SwiftkeyLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    # Get the raw content as text, weed out useless elements\n    for data in self.driver.page_source.split(\"<android.widget.FrameLayout\"):\n        if \"com.touchtype.swiftkey\" in data and \"<android.view.View \" in data:\n            sections = data.split(\"<android.view.View \")\n            for section in sections[1:]:\n                m = re.search(r\"content-desc=\\\"([^\\\"]*)\\\"\", section)\n                if m:\n                    suggestions.append(html.unescape(m.group(1)))\n            break\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.TappaLayoutDetector","title":"TappaLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the Tappa keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class TappaLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the Tappa keyboard. See `LayoutDetector` for more\n    information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=f\"./*/*[@package='{KEYBOARD_PACKAGE[TAPPA]}']\",\n            xpath_keys=\".//com.mocha.keyboard.inputmethod.keyboard.Key\",\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        # Get the raw content as text, weed out useless elements\n        section = self.driver.page_source.split(f\"{KEYBOARD_PACKAGE[TAPPA]}:id/toolbar\")[1].split(\n            \"</android.widget.FrameLayout>\"\n        )[0]\n\n        for line in section.split(\"\\n\"):\n            if \"<android.widget.TextView\" in line:\n                m = re.search(r\"text=\\\"([^\\\"]*)\\\"\", line)\n                if m:\n                    suggestions.append(html.unescape(m.group(1)))\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.TappaLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    # Get the raw content as text, weed out useless elements\n    section = self.driver.page_source.split(f\"{KEYBOARD_PACKAGE[TAPPA]}:id/toolbar\")[1].split(\n        \"</android.widget.FrameLayout>\"\n    )[0]\n\n    for line in section.split(\"\\n\"):\n        if \"<android.widget.TextView\" in line:\n            m = re.search(r\"text=\\\"([^\\\"]*)\\\"\", line)\n            if m:\n                suggestions.append(html.unescape(m.group(1)))\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.FleksyLayoutDetector","title":"FleksyLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the Fleksy keyboard. See LayoutDetector for more information.

Note that this class is only semi-automatically detected : the layout itself is not detected, but the suggestions are retrieved from the XML tree (no need to rely on OCR, much faster). The layout is hard-coded for now.

Source code in kebbie/emulator.py
class FleksyLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the Fleksy keyboard. See `LayoutDetector` for more\n    information.\n\n    Note that this class is only semi-automatically detected : the layout\n    itself is not detected, but the suggestions are retrieved from the XML tree\n    (no need to rely on OCR, much faster). The layout is hard-coded for now.\n    \"\"\"\n\n    def __init__(self, driver: webdriver.Remote):\n        self.driver = driver\n\n        # Adapt the layout to the screen\n        w = FLEKSY_LAYOUT[\"keyboard_frame\"][2]\n        h = FLEKSY_LAYOUT[\"keyboard_frame\"][3]\n        self.layout = {\"keyboard_frame\": FLEKSY_LAYOUT[\"keyboard_frame\"]}\n        for layout_name in [\"lowercase\", \"uppercase\", \"numbers\"]:\n            for key_name, key_frame in FLEKSY_LAYOUT[layout_name].items():\n                if layout_name not in self.layout:\n                    self.layout[layout_name] = {}\n                self.layout[layout_name][key_name] = [\n                    int(key_frame[0] * w),\n                    int(key_frame[1] * h),\n                    int(key_frame[2] * w),\n                    int(key_frame[3] * h),\n                ]\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        # Get the raw content as text, weed out useless elements\n        sections = [\n            s\n            for s in self.driver.page_source.split(\"XCUIElementTypeOther\")\n            if \"XCUIElementTypeStaticText\" in s and \"XCUIElementTypeButton\" not in s\n        ]\n\n        for s in sections:\n            m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", s)\n            if m:\n                suggestions.append(html.unescape(m.group(1)))\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.FleksyLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    # Get the raw content as text, weed out useless elements\n    sections = [\n        s\n        for s in self.driver.page_source.split(\"XCUIElementTypeOther\")\n        if \"XCUIElementTypeStaticText\" in s and \"XCUIElementTypeButton\" not in s\n    ]\n\n    for s in sections:\n        m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", s)\n        if m:\n            suggestions.append(html.unescape(m.group(1)))\n\n    return suggestions\n
"},{"location":"internals/#gesturepy","title":"gesture.py","text":"

Module containing the function make_swipe_gesture, which is used to create a natural-looking swipe gesture from a list of letter-points.

"},{"location":"internals/#kebbie.gesture.make_swipe_gesture","title":"make_swipe_gesture(control_points)","text":"

Function to generate artificial swipe gesture from a list of points. The given points represents the typed letters on the keyboard. This function simply generate several other points between the control points. Points are generated using sequential Bezier curves. The resulting swipe gesture pass by the control points.

Parameters:

Name Type Description Default control_points List[Tuple[float, float]]

Control points, representing the letter typed. The resulting swipe gesture will pass by these points.

required

Returns:

Type Description List[Tuple[float, float]]

Points generated by the swipe gesture.

Source code in kebbie/gesture.py
def make_swipe_gesture(control_points: List[Tuple[float, float]]) -> List[Tuple[float, float]]:\n    \"\"\"Function to generate artificial swipe gesture from a list of points.\n    The given points represents the typed letters on the keyboard. This\n    function simply generate several other points between the control points.\n    Points are generated using sequential Bezier curves. The resulting swipe\n    gesture pass by the control points.\n\n    Args:\n        control_points (List[Tuple[float, float]]): Control points,\n            representing the letter typed. The resulting swipe gesture will\n            pass by these points.\n\n    Returns:\n        Points generated by the swipe gesture.\n    \"\"\"\n    gesture_points = [control_points[0]]\n\n    # Pick a \"style\" (speed & acceleration) and keep it constant across the gesture\n    speed = random.uniform(MIN_N_POINTS_PER_DIST, MAX_N_POINTS_PER_DIST)\n    acceleration = random.uniform(MIN_ACCELERATION, MAX_ACCELERATION)\n\n    # Generate bezier curves between each control points\n    for p1, p2 in zip(control_points[:-1], control_points[1:]):\n        # The distance between the 2 points will dictate the speed and radius\n        d = euclidian_dist(p1, p2)\n        radius = min(d, MAX_RADIUS)\n        n_points = max(1, int(d * speed))\n\n        linspace = accelerated_linspace(n_points, acceleration)\n\n        # We don't want the curves to be straight between the control points,\n        # so we generate random points to add curves\n        p1_curv = random_point_around(p1, radius=radius)\n        p2_curv = random_point_around(p2, radius=radius)\n\n        # Make the bezier curve with the specified number of points\n        xs, ys = bezier_curve([p2, p2_curv, p1_curv, p1], linspace=linspace)\n        bezier_points = list(zip(xs, ys))\n\n        # Make sure the control point p2 is here\n        if bezier_points[-1] != p2:\n            bezier_points.append(p2)\n        # p1 was already added in the previous loop, no need to add it\n        if bezier_points[0] == p1:\n            bezier_points = bezier_points[1:]\n\n        gesture_points.extend(bezier_points)\n\n    return gesture_points\n
"},{"location":"internals/#kebbie.gesture.random_point_around","title":"random_point_around(p, radius)","text":"

Generate a random point around the given point p, within the given radius.

Parameters:

Name Type Description Default p Tuple[float, float]

Coordinates to use as a starting point.

required radius float

Radius within the starting point to generate the random point.

required

Returns:

Type Description Tuple[float, float]

Coordinates of the generated random point.

Source code in kebbie/gesture.py
def random_point_around(p: Tuple[float, float], radius: float) -> Tuple[float, float]:\n    \"\"\"Generate a random point around the given point p, within the given\n    radius.\n\n    Args:\n        p (Tuple[float, float]): Coordinates to use as a starting point.\n        radius (float): Radius within the starting point to generate the random\n            point.\n\n    Returns:\n        Coordinates of the generated random point.\n    \"\"\"\n    rand_x = random.uniform(p[0] - radius, p[0] + radius)\n    rand_y = random.uniform(p[1] - radius, p[1] + radius)\n    return (rand_x, rand_y)\n
"},{"location":"internals/#kebbie.gesture.bernstein_poly","title":"bernstein_poly(i, n, t)","text":"

The Bernstein polynomial of n, i as a function of t.

Taken from : https://stackoverflow.com/a/12644499/9494790

Parameters:

Name Type Description Default i int

i

required n int

n

required t float

t

required

Returns:

Type Description float

The computed value for this polynomial function.

Source code in kebbie/gesture.py
def bernstein_poly(i: int, n: int, t: float) -> float:\n    \"\"\"The Bernstein polynomial of n, i as a function of t.\n\n    Taken from : https://stackoverflow.com/a/12644499/9494790\n\n    Args:\n        i (int): i\n        n (int): n\n        t (float): t\n\n    Returns:\n        The computed value for this polynomial function.\n    \"\"\"\n    return comb(n, i) * (t ** (n - i)) * (1 - t) ** i\n
"},{"location":"internals/#kebbie.gesture.bezier_curve","title":"bezier_curve(control_points, linspace)","text":"

Given a set of control points, return the bezier curve defined by the control points.

See : http://processingjs.nihongoresources.com/bezierinfo/

Taken from : https://stackoverflow.com/a/12644499/9494790

Parameters:

Name Type Description Default control_points List[Tuple[float, float]]

Control points used to generate the bezier curve.

required linspace List[float]

Linspace to use for sampling points across the Bezier curve.

required

Returns:

Type Description Tuple[List[float], List[float]]

Sampled points along the bezier curve.

Source code in kebbie/gesture.py
def bezier_curve(control_points: List[Tuple[float, float]], linspace: List[float]) -> Tuple[List[float], List[float]]:\n    \"\"\"Given a set of control points, return the bezier curve defined by the\n    control points.\n\n    See : http://processingjs.nihongoresources.com/bezierinfo/\n\n    Taken from : https://stackoverflow.com/a/12644499/9494790\n\n    Args:\n        control_points (List[Tuple[float, float]]): Control points used to\n            generate the bezier curve.\n        linspace (List[float]): Linspace to use for sampling points across the\n            Bezier curve.\n\n    Returns:\n        Sampled points along the bezier curve.\n    \"\"\"\n    n_points = len(control_points)\n    x_points = np.array([p[0] for p in control_points])\n    y_points = np.array([p[1] for p in control_points])\n\n    polynomial_array = np.array([bernstein_poly(i, n_points - 1, linspace) for i in range(0, n_points)])\n\n    x_vals = np.dot(x_points, polynomial_array)\n    y_vals = np.dot(y_points, polynomial_array)\n\n    return x_vals, y_vals\n
"},{"location":"internals/#kebbie.gesture.accelerated_linspace","title":"accelerated_linspace(n, acceleration)","text":"

Alternative to np.linspace, instead of giving a range of number evenly distributed, this one is not evenly distributed, and simulate an acceleration at first, and then a deceleration.

Parameters:

Name Type Description Default n int

Number of points to generate in the linspace.

required acceleration float

A number that dictate how constant the acceleration is. The lower, the more S-shape is used.

required

Returns:

Type Description List[float]

Generated points.

Source code in kebbie/gesture.py
def accelerated_linspace(n: int, acceleration: float) -> List[float]:\n    \"\"\"Alternative to np.linspace, instead of giving a range of number evenly\n    distributed, this one is not evenly distributed, and simulate an\n    acceleration at first, and then a deceleration.\n\n    Args:\n        n (int): Number of points to generate in the linspace.\n        acceleration (float): A number that dictate how constant the\n            acceleration is. The lower, the more S-shape is used.\n\n    Returns:\n        Generated points.\n    \"\"\"\n\n    def norm(x):\n        nom = x - x.min()\n        denom = x.max() - x.min()\n        return nom / denom\n\n    def sigmoid(x, k):\n        return 1 / (1 + np.exp(-x / k))\n\n    linspace = np.linspace(-1.0, 1.0, n)\n\n    if n <= 1:\n        return linspace\n    else:\n        return norm(sigmoid(linspace, k=acceleration))\n
"},{"location":"internals/#layoutpy","title":"layout.py","text":"

Module containing the helpers LayoutHelper, useful class to deal with the layout of a keyboard, access key positions, etc...

"},{"location":"internals/#kebbie.layout.KeyInfo","title":"KeyInfo dataclass","text":"

Structure containing all information needed for a given character (key).

Parameters:

Name Type Description Default klayer_id int

Keyboard Layer ID where this key is located.

required width float

Width of the key.

required height float

Height of the key.

required center Tuple[float, float]

Center position (x, y coordinates) of the key.

required Source code in kebbie/layout.py
@dataclass\nclass KeyInfo:\n    \"\"\"Structure containing all information needed for a given character (key).\n\n    Args:\n        klayer_id (int): Keyboard Layer ID where this key is located.\n        width (float): Width of the key.\n        height (float): Height of the key.\n        center (Tuple[float, float]): Center position (x, y coordinates) of the\n            key.\n    \"\"\"\n\n    klayer_id: int\n    width: float\n    height: float\n    center: Tuple[float, float]\n
"},{"location":"internals/#kebbie.layout.Key","title":"Key dataclass","text":"

Structure containing information needed for each key of a given keyboard layer.

Parameters:

Name Type Description Default char str

Character associated with this key.

required bounds Dict[str, float]

Dictionary representing the bounding box of the key. The dictionary should contains the following keys : right, left, top, bottom.

required Source code in kebbie/layout.py
@dataclass\nclass Key:\n    \"\"\"Structure containing information needed for each key of a given keyboard\n    layer.\n\n    Args:\n        char (str): Character associated with this key.\n        bounds (Dict[str, float]): Dictionary representing the bounding box of\n            the key. The dictionary should contains the following keys :\n            `right`, `left`, `top`, `bottom`.\n    \"\"\"\n\n    char: str\n    bounds: Dict[str, float]\n
"},{"location":"internals/#kebbie.layout.LayoutHelper","title":"LayoutHelper","text":"

Small class that represents a Keyboard layout. The goal of this class is to offer some easy-to-use method to deal with a keyboard layout.

Parameters:

Name Type Description Default lang str

Language of the layout to load.

'en-US' custom_keyboard Dict

If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.

None ignore_layers_after Optional[int])

Ignore higher layers of the keyboard layout. If None is given, no layer is ignored.

None Source code in kebbie/layout.py
class LayoutHelper:\n    \"\"\"Small class that represents a Keyboard layout. The goal of this class is\n    to offer some easy-to-use method to deal with a keyboard layout.\n\n    Args:\n        lang (str, optional): Language of the layout to load.\n        custom_keyboard (Dict, optional): If provided, instead of relying on\n            the keyboard layout provided by default, uses the given keyboard\n            layout.\n        ignore_layers_after (Optional[int]) : Ignore higher layers of the\n            keyboard layout. If `None` is given, no layer is ignored.\n    \"\"\"\n\n    def __init__(self, lang: str = \"en-US\", custom_keyboard: Dict = None, ignore_layers_after: Optional[int] = None):\n        keyboard = custom_keyboard if custom_keyboard is not None else load_keyboard(lang)\n        self.keys_info, self.klayers_info, self.accents = self._extract_infos(keyboard[\"layout\"], ignore_layers_after)\n        self.letter_accents = [c for c in self.accents if re.match(r\"^[\\pL]+$\", c)]\n        self.spelling_symbols = keyboard[\"settings\"][\"allowed_symbols_in_words\"]\n        self.layout_name = keyboard[\"keyboard\"][\"default-layout\"]\n\n    def _extract_infos(  # noqa: C901\n        self, keyboard_layout: Dict, ignore_layers_after: Optional[int] = None\n    ) -> Tuple[Dict[str, KeyInfo], Dict[int, Key], List[str]]:\n        \"\"\"This method reads the given keyboard layout, and extract useful data\n        structures from this (to be used later by other methods). This\n        basically builds the LayoutHelper class (and should be used only inside\n        the constructor).\n\n        Note:\n            The given keyboard layout contains 24 layers. Each key appears in\n            one (or several) layer of the keyboard. Accents are associated to\n            the same key as their non-accented version.\n            This class may be used to generate typing noise, so accents should\n            have their own keys (and closer accents should be represented by\n            closer keys). This method takes care of it, by generating \"virtual\n            keyboard layers\", for each group of accents. The goal is to\n            generate a virtual keyboard layer that is as close as possible as\n            the actual keyboard, used by real-users.\n\n        Args:\n            keyboard_layout (Dict): Dictionary representing the keyboard and\n                its layout.\n            ignore_layers_after (Optional[int]) : Ignore higher layers of the\n                keyboard layout. If `None` is given, no layer is ignored.\n\n        Returns:\n            Key information for each character in the keyboard.\n            Key information for each layer of the keyboard.\n            List of accents used in the keyboard.\n        \"\"\"\n        keys_info = {}  # Dict char -> key infos (bounds, center, klayer ID)\n        klayers_info = defaultdict(list)  # Dict klayer ID -> list of keys (bounds, char)\n        all_accents = set()\n\n        # A keyboard layout is made of several \"layers\", each identified by a KeyboardID\n        last_klayer_id = len(keyboard_layout)\n        for klayer in keyboard_layout:\n            if klayer[\"buttons\"] is None or (ignore_layers_after is not None and klayer[\"id\"] > ignore_layers_after):\n                continue\n\n            # Each layer is a list of button\n            for button in klayer[\"buttons\"]:\n                # Button always have a character, and optionally accents\n                char, accents = button[\"labels\"][0], button[\"labels\"][1:]\n\n                # Special characters : space, shift, numbers, magic, etc...\n                if button[\"type\"] != 1:\n                    if char.lower() == SPACE:\n                        char = \" \"\n                    elif char == POINT:\n                        # Points should be added to our key infos\n                        pass\n                    else:\n                        # Other special characters are ignored\n                        char = None\n\n                if char is None:\n                    continue\n\n                # Save the character and its key information\n                # Save it only if it's not already in a previous klayer\n                if char not in keys_info or keys_info[char].klayer_id > klayer[\"id\"]:\n                    keys_info[char] = KeyInfo(\n                        klayer[\"id\"],\n                        button[\"boundingRect\"][\"right\"] - button[\"boundingRect\"][\"left\"],\n                        button[\"boundingRect\"][\"bottom\"] - button[\"boundingRect\"][\"top\"],\n                        (button[\"centerPoint\"][\"x\"], button[\"centerPoint\"][\"y\"]),\n                    )\n                # But always save its info in the klayers info\n                klayers_info[klayer[\"id\"]].append(Key(char, button[\"boundingRect\"]))\n\n                # Then, save the accents if any\n                for i, char_accent in enumerate(accents):\n                    all_accents.add(char_accent)\n\n                    # Create a virtual position for the accent\n                    bounds, center = self._make_virtual_key(i, button[\"boundingRect\"])\n\n                    # Save the accent (only if not existing) in a new virtual klayer\n                    if char_accent not in keys_info:\n                        keys_info[char_accent] = KeyInfo(\n                            last_klayer_id,\n                            bounds[\"right\"] - bounds[\"left\"],\n                            bounds[\"bottom\"] - bounds[\"top\"],\n                            (center[\"x\"], center[\"y\"]),\n                        )\n                    # But always saveits info in the klayers info\n                    klayers_info[last_klayer_id].append(Key(char_accent, bounds))\n\n                # If we added some accent in a virtual klayer, don't forget to update the last klayer ID\n                if accents:\n                    last_klayer_id += 1\n\n        return keys_info, klayers_info, sorted(all_accents)\n\n    def _make_virtual_key(\n        self, idx: int, initial_bounds: Dict[str, float]\n    ) -> Tuple[Dict[str, float], Dict[str, float]]:\n        \"\"\"Method to create a new boundary for an accented character. Based on\n        the given id, the generated boundary box will be generated at a\n        different position.\n\n        This method tries to follow a similar pattern as the sample app, with\n        accents appearing in lines of 4 accents.\n\n        Args:\n            idx (int): The index of the bounding box to generate.\n            initial_bounds (Dict[str, float]): The bounding box of the\n                non-accented key.\n\n        Returns:\n            Generated bounding box.\n            Its associated center position.\n        \"\"\"\n        width = initial_bounds[\"right\"] - initial_bounds[\"left\"]\n        height = initial_bounds[\"bottom\"] - initial_bounds[\"top\"]\n\n        start_x = initial_bounds[\"left\"] + (idx % N_ACCENT_PER_LINE) * width\n        start_y = initial_bounds[\"bottom\"] - (idx // N_ACCENT_PER_LINE) * height\n\n        bounds = {\n            \"bottom\": start_y,\n            \"left\": start_x,\n            \"right\": start_x + width,\n            \"top\": start_y - height,\n        }\n        center = {\n            \"x\": bounds[\"left\"] + width / 2,\n            \"y\": bounds[\"top\"] + height / 2,\n        }\n        return bounds, center\n\n    def get_key_info(self, char: str) -> Tuple[float, float, float, float, int]:\n        \"\"\"Method to retrieve the information associated to a specific key.\n\n        Args:\n            char (str): Character for which to retrieve key information.\n\n        Raises:\n            KeyError: Exception raised if the given character can't be typed (\n                because it doesn't exist on this keyboard layout).\n\n        Returns:\n            Width of the key for the requested character.\n            Height of the key for the requested character.\n            Center position (x-axis) of the key for the requested character.\n            Center position (y-axis) of the key for the requested character.\n            Keyboard layer ID where the character's key is located.\n        \"\"\"\n        k = self.keys_info[char]\n        return k.width, k.height, k.center[0], k.center[1], k.klayer_id\n\n    def get_key(self, pos: Tuple[float, float], klayer_id: int) -> str:\n        \"\"\"Get the character associated with the given position.\n\n        Args:\n            pos (Tuple[float, float]): Position (x, y) in the keyboard.\n            klayer_id (int): Keyboard layer ID to use.\n\n        Returns:\n            Character associated to the given position.\n        \"\"\"\n        klayer = self.klayers_info[klayer_id]\n\n        try:\n            # Retrieve the key that contains the sampled position\n            key = next(\n                k\n                for k in klayer\n                if k.bounds[\"left\"] <= pos[0] <= k.bounds[\"right\"] and k.bounds[\"top\"] <= pos[1] <= k.bounds[\"bottom\"]\n            )\n        except StopIteration:\n            # Maybe the sampled position was out of bound -> retrieve the closest key\n            key = min(\n                klayer,\n                key=lambda k: euclidian_dist(\n                    pos,\n                    (\n                        k.bounds[\"left\"] + (k.bounds[\"right\"] - k.bounds[\"left\"]) / 2,\n                        k.bounds[\"top\"] + (k.bounds[\"bottom\"] - k.bounds[\"top\"]) / 2,\n                    ),\n                ),\n            )\n\n        return key.char\n
"},{"location":"internals/#kebbie.layout.LayoutHelper._extract_infos","title":"_extract_infos(keyboard_layout, ignore_layers_after=None)","text":"

This method reads the given keyboard layout, and extract useful data structures from this (to be used later by other methods). This basically builds the LayoutHelper class (and should be used only inside the constructor).

Note

The given keyboard layout contains 24 layers. Each key appears in one (or several) layer of the keyboard. Accents are associated to the same key as their non-accented version. This class may be used to generate typing noise, so accents should have their own keys (and closer accents should be represented by closer keys). This method takes care of it, by generating \"virtual keyboard layers\", for each group of accents. The goal is to generate a virtual keyboard layer that is as close as possible as the actual keyboard, used by real-users.

Parameters:

Name Type Description Default keyboard_layout Dict

Dictionary representing the keyboard and its layout.

required ignore_layers_after Optional[int])

Ignore higher layers of the keyboard layout. If None is given, no layer is ignored.

None

Returns:

Type Description Dict[str, KeyInfo]

Key information for each character in the keyboard.

Dict[int, Key]

Key information for each layer of the keyboard.

List[str]

List of accents used in the keyboard.

Source code in kebbie/layout.py
def _extract_infos(  # noqa: C901\n    self, keyboard_layout: Dict, ignore_layers_after: Optional[int] = None\n) -> Tuple[Dict[str, KeyInfo], Dict[int, Key], List[str]]:\n    \"\"\"This method reads the given keyboard layout, and extract useful data\n    structures from this (to be used later by other methods). This\n    basically builds the LayoutHelper class (and should be used only inside\n    the constructor).\n\n    Note:\n        The given keyboard layout contains 24 layers. Each key appears in\n        one (or several) layer of the keyboard. Accents are associated to\n        the same key as their non-accented version.\n        This class may be used to generate typing noise, so accents should\n        have their own keys (and closer accents should be represented by\n        closer keys). This method takes care of it, by generating \"virtual\n        keyboard layers\", for each group of accents. The goal is to\n        generate a virtual keyboard layer that is as close as possible as\n        the actual keyboard, used by real-users.\n\n    Args:\n        keyboard_layout (Dict): Dictionary representing the keyboard and\n            its layout.\n        ignore_layers_after (Optional[int]) : Ignore higher layers of the\n            keyboard layout. If `None` is given, no layer is ignored.\n\n    Returns:\n        Key information for each character in the keyboard.\n        Key information for each layer of the keyboard.\n        List of accents used in the keyboard.\n    \"\"\"\n    keys_info = {}  # Dict char -> key infos (bounds, center, klayer ID)\n    klayers_info = defaultdict(list)  # Dict klayer ID -> list of keys (bounds, char)\n    all_accents = set()\n\n    # A keyboard layout is made of several \"layers\", each identified by a KeyboardID\n    last_klayer_id = len(keyboard_layout)\n    for klayer in keyboard_layout:\n        if klayer[\"buttons\"] is None or (ignore_layers_after is not None and klayer[\"id\"] > ignore_layers_after):\n            continue\n\n        # Each layer is a list of button\n        for button in klayer[\"buttons\"]:\n            # Button always have a character, and optionally accents\n            char, accents = button[\"labels\"][0], button[\"labels\"][1:]\n\n            # Special characters : space, shift, numbers, magic, etc...\n            if button[\"type\"] != 1:\n                if char.lower() == SPACE:\n                    char = \" \"\n                elif char == POINT:\n                    # Points should be added to our key infos\n                    pass\n                else:\n                    # Other special characters are ignored\n                    char = None\n\n            if char is None:\n                continue\n\n            # Save the character and its key information\n            # Save it only if it's not already in a previous klayer\n            if char not in keys_info or keys_info[char].klayer_id > klayer[\"id\"]:\n                keys_info[char] = KeyInfo(\n                    klayer[\"id\"],\n                    button[\"boundingRect\"][\"right\"] - button[\"boundingRect\"][\"left\"],\n                    button[\"boundingRect\"][\"bottom\"] - button[\"boundingRect\"][\"top\"],\n                    (button[\"centerPoint\"][\"x\"], button[\"centerPoint\"][\"y\"]),\n                )\n            # But always save its info in the klayers info\n            klayers_info[klayer[\"id\"]].append(Key(char, button[\"boundingRect\"]))\n\n            # Then, save the accents if any\n            for i, char_accent in enumerate(accents):\n                all_accents.add(char_accent)\n\n                # Create a virtual position for the accent\n                bounds, center = self._make_virtual_key(i, button[\"boundingRect\"])\n\n                # Save the accent (only if not existing) in a new virtual klayer\n                if char_accent not in keys_info:\n                    keys_info[char_accent] = KeyInfo(\n                        last_klayer_id,\n                        bounds[\"right\"] - bounds[\"left\"],\n                        bounds[\"bottom\"] - bounds[\"top\"],\n                        (center[\"x\"], center[\"y\"]),\n                    )\n                # But always saveits info in the klayers info\n                klayers_info[last_klayer_id].append(Key(char_accent, bounds))\n\n            # If we added some accent in a virtual klayer, don't forget to update the last klayer ID\n            if accents:\n                last_klayer_id += 1\n\n    return keys_info, klayers_info, sorted(all_accents)\n
"},{"location":"internals/#kebbie.layout.LayoutHelper._make_virtual_key","title":"_make_virtual_key(idx, initial_bounds)","text":"

Method to create a new boundary for an accented character. Based on the given id, the generated boundary box will be generated at a different position.

This method tries to follow a similar pattern as the sample app, with accents appearing in lines of 4 accents.

Parameters:

Name Type Description Default idx int

The index of the bounding box to generate.

required initial_bounds Dict[str, float]

The bounding box of the non-accented key.

required

Returns:

Type Description Dict[str, float]

Generated bounding box.

Dict[str, float]

Its associated center position.

Source code in kebbie/layout.py
def _make_virtual_key(\n    self, idx: int, initial_bounds: Dict[str, float]\n) -> Tuple[Dict[str, float], Dict[str, float]]:\n    \"\"\"Method to create a new boundary for an accented character. Based on\n    the given id, the generated boundary box will be generated at a\n    different position.\n\n    This method tries to follow a similar pattern as the sample app, with\n    accents appearing in lines of 4 accents.\n\n    Args:\n        idx (int): The index of the bounding box to generate.\n        initial_bounds (Dict[str, float]): The bounding box of the\n            non-accented key.\n\n    Returns:\n        Generated bounding box.\n        Its associated center position.\n    \"\"\"\n    width = initial_bounds[\"right\"] - initial_bounds[\"left\"]\n    height = initial_bounds[\"bottom\"] - initial_bounds[\"top\"]\n\n    start_x = initial_bounds[\"left\"] + (idx % N_ACCENT_PER_LINE) * width\n    start_y = initial_bounds[\"bottom\"] - (idx // N_ACCENT_PER_LINE) * height\n\n    bounds = {\n        \"bottom\": start_y,\n        \"left\": start_x,\n        \"right\": start_x + width,\n        \"top\": start_y - height,\n    }\n    center = {\n        \"x\": bounds[\"left\"] + width / 2,\n        \"y\": bounds[\"top\"] + height / 2,\n    }\n    return bounds, center\n
"},{"location":"internals/#kebbie.layout.LayoutHelper.get_key_info","title":"get_key_info(char)","text":"

Method to retrieve the information associated to a specific key.

Parameters:

Name Type Description Default char str

Character for which to retrieve key information.

required

Raises:

Type Description KeyError

Exception raised if the given character can't be typed ( because it doesn't exist on this keyboard layout).

Returns:

Type Description float

Width of the key for the requested character.

float

Height of the key for the requested character.

float

Center position (x-axis) of the key for the requested character.

float

Center position (y-axis) of the key for the requested character.

int

Keyboard layer ID where the character's key is located.

Source code in kebbie/layout.py
def get_key_info(self, char: str) -> Tuple[float, float, float, float, int]:\n    \"\"\"Method to retrieve the information associated to a specific key.\n\n    Args:\n        char (str): Character for which to retrieve key information.\n\n    Raises:\n        KeyError: Exception raised if the given character can't be typed (\n            because it doesn't exist on this keyboard layout).\n\n    Returns:\n        Width of the key for the requested character.\n        Height of the key for the requested character.\n        Center position (x-axis) of the key for the requested character.\n        Center position (y-axis) of the key for the requested character.\n        Keyboard layer ID where the character's key is located.\n    \"\"\"\n    k = self.keys_info[char]\n    return k.width, k.height, k.center[0], k.center[1], k.klayer_id\n
"},{"location":"internals/#kebbie.layout.LayoutHelper.get_key","title":"get_key(pos, klayer_id)","text":"

Get the character associated with the given position.

Parameters:

Name Type Description Default pos Tuple[float, float]

Position (x, y) in the keyboard.

required klayer_id int

Keyboard layer ID to use.

required

Returns:

Type Description str

Character associated to the given position.

Source code in kebbie/layout.py
def get_key(self, pos: Tuple[float, float], klayer_id: int) -> str:\n    \"\"\"Get the character associated with the given position.\n\n    Args:\n        pos (Tuple[float, float]): Position (x, y) in the keyboard.\n        klayer_id (int): Keyboard layer ID to use.\n\n    Returns:\n        Character associated to the given position.\n    \"\"\"\n    klayer = self.klayers_info[klayer_id]\n\n    try:\n        # Retrieve the key that contains the sampled position\n        key = next(\n            k\n            for k in klayer\n            if k.bounds[\"left\"] <= pos[0] <= k.bounds[\"right\"] and k.bounds[\"top\"] <= pos[1] <= k.bounds[\"bottom\"]\n        )\n    except StopIteration:\n        # Maybe the sampled position was out of bound -> retrieve the closest key\n        key = min(\n            klayer,\n            key=lambda k: euclidian_dist(\n                pos,\n                (\n                    k.bounds[\"left\"] + (k.bounds[\"right\"] - k.bounds[\"left\"]) / 2,\n                    k.bounds[\"top\"] + (k.bounds[\"bottom\"] - k.bounds[\"top\"]) / 2,\n                ),\n            ),\n        )\n\n    return key.char\n
"},{"location":"internals/#noise_modelpy","title":"noise_model.py","text":"

Module defining the NoiseModel class, which takes care of introducing typos in a clean text (and later see if the model can properly correct these typos).

"},{"location":"internals/#kebbie.noise_model.Typo","title":"Typo","text":"

Bases: Enum

Enum listing all possible typos that can be introduced.

Source code in kebbie/noise_model.py
class Typo(Enum):\n    \"\"\"Enum listing all possible typos that can be introduced.\"\"\"\n\n    # Deletions\n    DELETE_SPELLING_SYMBOL = \"DELETE_SPELLING_SYMBOL\"\n    DELETE_SPACE = \"DELETE_SPACE\"\n    DELETE_PUNCTUATION = \"DELETE_PUNCTUATION\"\n    DELETE_CHAR = \"DELETE_CHAR\"\n\n    # Additions\n    ADD_SPELLING_SYMBOL = \"ADD_SPELLING_SYMBOL\"\n    ADD_SPACE = \"ADD_SPACE\"\n    ADD_PUNCTUATION = \"ADD_PUNCTUATION\"\n    ADD_CHAR = \"ADD_CHAR\"\n\n    # Substitutions\n    SUBSTITUTE_CHAR = \"SUBSTITUTE_CHAR\"\n\n    # Simplifications\n    SIMPLIFY_ACCENT = \"SIMPLIFY_ACCENT\"\n    SIMPLIFY_CASE = \"SIMPLIFY_CASE\"\n\n    # Transposition\n    TRANSPOSE_CHAR = \"TRANSPOSE_CHAR\"\n\n    # Common typos\n    COMMON_TYPO = \"COMMON_TYPO\"\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel","title":"NoiseModel","text":"

Class responsible for introducing typo in a clean text.

Most of typos are introduced on text directly. Then fuzzy typing is applied, using two Gaussian distributions (for x-axis and y-axis), mimicking a user typing on a soft keyboard.

The ratio arguments are here to choose how wide the Gaussian distribution is. A wider distribution will be less precise, a narrower distribution will be more precise. To test how wide a ratio is, run the following code :

from scipy.stats import norm\n\ndef compute(x):\n    cdf = norm.cdf(x)\n    return cdf - (1 - cdf)\n\nprint(compute(2.32))    # >>> 0.9796591226625606\n
So in this case, a ratio of 2.32 gives a precision of ~98% (a typo will be introduced in 2% of the cases).

Parameters:

Name Type Description Default lang str

Language used.

required custom_keyboard Dict

If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.

None common_typos Optional[Dict[str, List[str]]]

Dictionary of common typos. If None, common typos are not used.

None typo_probs Optional[Dict[str, float]]

Probabilities for each type of typos. If None is given, DEFAULT_TYPO_PROBS is used.

None x_offset float

Parameter for the Gaussian distribution for the fuzzy typing. Base position offset on the x-axis.

0 y_offset float

Parameter for the Gaussian distribution for the fuzzy typing. Base position offset on the y-axis.

0 x_ratio float

Parameter for the Gaussian distribution for the fuzzy typing. It controls how wide the distribution is on the x-axis, which is the precision of the typing.

DEFAULT_SIGMA_RATIO y_ratio float

Parameter for the Gaussian distribution for the fuzzy typing. It controls how wide the distribution is on the y-axis, which is the precision of the typing.

DEFAULT_SIGMA_RATIO Source code in kebbie/noise_model.py
class NoiseModel:\n    \"\"\"Class responsible for introducing typo in a clean text.\n\n    Most of typos are introduced on text directly. Then fuzzy typing is\n    applied, using two Gaussian distributions (for x-axis and y-axis),\n    mimicking a user typing on a soft keyboard.\n\n    The ratio arguments are here to choose how wide the Gaussian distribution\n    is. A wider distribution will be less precise, a narrower distribution will\n    be more precise. To test how wide a ratio is, run the following code :\n    ```\n    from scipy.stats import norm\n\n    def compute(x):\n        cdf = norm.cdf(x)\n        return cdf - (1 - cdf)\n\n    print(compute(2.32))    # >>> 0.9796591226625606\n    ```\n    So in this case, a ratio of `2.32` gives a precision of ~98% (a typo will\n    be introduced in 2% of the cases).\n\n    Args:\n        lang (str): Language used.\n        custom_keyboard (Dict, optional): If provided, instead of relying on\n            the keyboard layout provided by default, uses the given keyboard\n            layout.\n        common_typos (Optional[Dict[str, List[str]]], optional): Dictionary of\n            common typos. If `None`, common typos are not used.\n        typo_probs (Optional[Dict[str, float]], optional): Probabilities for\n            each type of typos. If `None` is given, `DEFAULT_TYPO_PROBS` is\n            used.\n        x_offset (float, optional): Parameter for the Gaussian distribution for\n            the fuzzy typing. Base position offset on the x-axis.\n        y_offset (float, optional): Parameter for the Gaussian distribution for\n            the fuzzy typing. Base position offset on the y-axis.\n        x_ratio (float, optional): Parameter for the Gaussian distribution for\n            the fuzzy typing. It controls how wide the distribution is on the\n            x-axis, which is the precision of the typing.\n        y_ratio (float, optional): Parameter for the Gaussian distribution for\n            the fuzzy typing. It controls how wide the distribution is on the\n            y-axis, which is the precision of the typing.\n    \"\"\"\n\n    def __init__(\n        self,\n        lang: str,\n        custom_keyboard: Dict = None,\n        common_typos: Optional[Dict[str, List[str]]] = None,\n        typo_probs: Optional[Dict[str, float]] = None,\n        x_offset: float = 0,\n        y_offset: float = 0,\n        x_ratio: float = DEFAULT_SIGMA_RATIO,\n        y_ratio: float = DEFAULT_SIGMA_RATIO,\n    ):\n        self.lang = lang\n        self.x_offset, self.y_offset = x_offset, y_offset\n        self.x_ratio, self.y_ratio = x_ratio, y_ratio\n        self.klayout = LayoutHelper(self.lang, custom_keyboard=custom_keyboard, ignore_layers_after=3)\n        self.probs = typo_probs if typo_probs is not None else DEFAULT_TYPO_PROBS\n        self.common_typos = common_typos if common_typos is not None else self._get_common_typos()\n\n    def type_till_space(\n        self,\n        words: List[str],\n    ) -> Tuple[\n        List[Optional[Tuple[float, float]]],\n        str,\n        int,\n        List[Typo],\n    ]:\n        \"\"\"Method introducing typos word by word.\n\n        This method receives a list of words, and type these words while\n        introducing typos.\n        So most of the time, only one word will be typed and the method will\n        return. In some cases, the space is mistyped or deleted, so two words\n        are typed.\n\n        Args:\n            words (List[str]): List of words to type.\n\n        Returns:\n            List of keystrokes (may contains some None).\n            The typed characters as string.\n            The number of words typed.\n            The list of typos introduced in the string typed.\n        \"\"\"\n        all_keystrokes = []\n        all_typed_char = \"\"\n        all_typos = []\n\n        for i, word in enumerate(words):\n            # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos\n            error_free = False if self._is_correctable(word) else True\n\n            # Add typos in the word\n            noisy_word, typos = self._introduce_typos(word, error_free=error_free)\n            all_typos += typos\n\n            # Type the word (fuzzy)\n            keystrokes, typed_char, typos = self._fuzzy_type(noisy_word, error_free=error_free)\n            all_keystrokes += keystrokes\n            all_typed_char += typed_char\n            all_typos += typos\n\n            # Then, we try to type a space (separator between words)\n            # TODO : Modify this part for languages without space\n            noisy_space, sp_typo_1 = self._introduce_typos(SPACE)\n            keystrokes, typed_char, sp_typo_2 = self._fuzzy_type(noisy_space)\n\n            # If the space is correctly typed, return now, otherwise type the next word\n            if not sp_typo_1 and not sp_typo_2:\n                break\n            else:\n                all_keystrokes += keystrokes\n                all_typed_char += typed_char\n                all_typos += sp_typo_1 + sp_typo_2\n\n        return all_keystrokes, all_typed_char, i + 1, all_typos\n\n    def swipe(self, word: str) -> Optional[List[Tuple[float, float]]]:\n        \"\"\"Method for creating an artificial swipe gesture given a word.\n\n        Args:\n            word (str): Word to type with a swipe gesture.\n\n        Returns:\n            Positions (x, y) of the generated swipe gesture, or None if the\n                swipe gesture couldn't be created.\n        \"\"\"\n        # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos\n        error_free = False if self._is_correctable(word) else True\n\n        # Get the core keystrokes (fuzzy)\n        keystrokes, *_ = self._fuzzy_type(word, error_free=error_free)\n\n        # If we can swipe that word, create the corresponding artificial gesture\n        if all(keystrokes) and len(keystrokes) > 1:\n            return make_swipe_gesture(keystrokes)\n        else:\n            return None\n\n    def _introduce_typos(self, word: str, error_free: bool = False) -> Tuple[str, List[Typo]]:  # noqa: C901\n        \"\"\"Method to introduce typos in a given string.\n\n        Either the word is changed into an existing common typo, or the word is\n        processed as a stream of characters, each character having a chance of\n        being mistyped.\n        This method only add regular typos (deletions, additions, etc...), and\n        is not introducing fuzzy typing.\n\n        Args:\n            word (str): Clean string where to add typos.\n            error_free (bool): If set to True, don't introduce typo. Defaults\n                to False.\n\n        Returns:\n            The noisy string.\n            The list of typos introduced.\n        \"\"\"\n        if error_free:\n            return word, []\n\n        # First of all, we either consider the word as a unit and introduce a\n        # language-specific common typo (if available), or treat the word as a\n        # sequence of character, where each character can have a typo\n        if word in self.common_typos and sample(self.probs[Typo.COMMON_TYPO]):\n            # Introduce a common typo\n            return random.choice(self.common_typos[word]), [Typo.COMMON_TYPO]\n\n        # From here, treat the word as a stream of characters, and potentially\n        # add typos for each character\n        noisy_word = \"\"\n        typos = []\n        word_chars = list(word)\n        for i, char in enumerate(word_chars):\n            # First, potentially apply simplifications (removing accent, or\n            # lowercasing an uppercase character)\n            # Note that if the full word is uppercase, we don't apply lowercase\n            # simplification (doesn't feel like a natural typo a user would do)\n            if char in self.klayout.letter_accents and sample(self.probs[Typo.SIMPLIFY_ACCENT]):\n                char = strip_accents(char)\n                typos.append(Typo.SIMPLIFY_ACCENT)\n            if char.isupper() and len(word) > 1 and not word.isupper() and sample(self.probs[Typo.SIMPLIFY_CASE]):\n                char = char.lower()\n                typos.append(Typo.SIMPLIFY_CASE)\n\n            # Check if this character exists on our keyboard\n            try:\n                *_, klayer_id = self.klayout.get_key_info(char)\n                char_is_on_kb = True\n                char_is_on_default_kb = klayer_id == 0\n            except KeyError:\n                char_is_on_kb = char_is_on_default_kb = False\n\n            # Then, add the possible typo depending on the character type\n            events = []\n            is_first_char = bool(i == 0)\n            is_last_char = bool(i >= (len(word_chars) - 1))\n            if char.isnumeric() or not char_is_on_kb:\n                # Don't introduce typos for numbers or symbols that are not on keyboard\n                pass\n            else:\n                if not is_last_char:\n                    # Only transpose char if they are on the same keyboard layer\n                    try:\n                        *_, next_char_klayer_id = self.klayout.get_key_info(word[i + 1])\n                    except KeyError:\n                        next_char_klayer_id = None\n\n                    if klayer_id == next_char_klayer_id:\n                        events.append(Typo.TRANSPOSE_CHAR)\n                if char in self.klayout.spelling_symbols:\n                    events.append(Typo.DELETE_SPELLING_SYMBOL)\n                    events.append(Typo.ADD_SPELLING_SYMBOL)\n                elif char.isspace():\n                    events.append(Typo.DELETE_SPACE)\n                    events.append(Typo.ADD_SPACE)\n                elif char in string.punctuation:\n                    events.append(Typo.DELETE_PUNCTUATION)\n                    events.append(Typo.ADD_PUNCTUATION)\n                elif char_is_on_default_kb:\n                    events.append(Typo.DELETE_CHAR)\n                    events.append(Typo.ADD_CHAR)\n\n            # If it's the last character (and we are not typing a space),\n            # don't add deletions typos, because it's an auto-completion case,\n            # not auto-correction\n            if is_last_char and word != SPACE:\n                events = [e for e in events if e not in DELETIONS]\n\n            # Get the probabilities for these possible events\n            typo_probs = {e: self.probs[e] for e in events}\n            if is_first_char:\n                # Deleting the first character of the word is not so common, update the probabilities accordingly\n                typo_probs = {e: p * FRONT_DELETION_MULTIPLIER if e in DELETIONS else p for e, p in typo_probs.items()}\n\n            # And sample one of them\n            typo = sample_among(typo_probs)\n\n            # Process the typo\n            if typo is Typo.TRANSPOSE_CHAR:\n                noisy_char = word_chars[i + 1]\n                word_chars[i + 1] = char\n            elif typo in [Typo.DELETE_SPELLING_SYMBOL, Typo.DELETE_SPACE, Typo.DELETE_PUNCTUATION, Typo.DELETE_CHAR]:\n                noisy_char = \"\"\n            elif typo in [Typo.ADD_SPELLING_SYMBOL, Typo.ADD_SPACE, Typo.ADD_PUNCTUATION, Typo.ADD_CHAR]:\n                noisy_char = f\"{char}{char}\"\n            else:  # No typo\n                noisy_char = char\n\n            noisy_word += noisy_char\n            if typo is not None:\n                typos.append(typo)\n\n        return noisy_word, typos\n\n    def _fuzzy_type(\n        self, word: str, error_free: bool = False\n    ) -> Tuple[List[Optional[Tuple[float, float]]], str, List[Typo]]:\n        \"\"\"Method adding fuzzy typing.\n\n        This method takes a string (potentially already noisy from other type\n        of typos), and fuzzy-type it : simulate a user on a soft-keyboard.\n        This \"fat-finger syndrom\" is simulated using two Gaussian\n        distributions, one for each axis (x, y).\n        This method also returns the generated keystrokes (positions on the\n        keyboard), but only for the default keyboard (ID = 0). Keystrokes from\n        other keyboard are set to None.\n\n        Args:\n            word (str): String to fuzzy-type.\n            error_free (bool): If set to True, don't introduce typo. Defaults\n                to False.\n\n        Returns:\n            List of keystrokes.\n            Fuzzy string (corresponding to the keystrokes).\n            List of typos introduced.\n        \"\"\"\n        fuzzy_word = \"\"\n        keystrokes = []\n        typos = []\n\n        # Type word character by character\n        for char in word:\n            try:\n                width, height, x_center, y_center, klayer_id = self.klayout.get_key_info(char)\n            except KeyError:\n                # This character doesn't exist on the current keyboard\n                # Just type it without introducing typo, like if the user copy-pasted it\n                keystrokes.append(None)\n                fuzzy_word += char\n                continue\n\n            # Sample a keystroke for this character\n            # Note that we don't generate typos for characters outside of the default keyboard\n            if error_free or klayer_id != 0:\n                keystroke = (x_center, y_center)\n            else:\n                # Compute mu and sigma for the Normal distribution\n                x_mu = x_center + self.x_offset\n                y_mu = y_center + self.y_offset\n                x_sigma = (width / 2) / self.x_ratio\n                y_sigma = (height / 2) / self.y_ratio\n\n                # Sample a position (x and y)\n                keystroke = (random.gauss(x_mu, x_sigma), random.gauss(y_mu, y_sigma))\n\n            # Convert it back to a character, to see where we tapped\n            fuzzy_char = self.klayout.get_key(keystroke, klayer_id)\n\n            # Save it (save the keystroke only if part of the default keyboard)\n            keystrokes.append(keystroke if klayer_id == 0 else None)\n            fuzzy_word += fuzzy_char\n            if fuzzy_char != char:\n                typos.append(Typo.SUBSTITUTE_CHAR)\n\n        return keystrokes, fuzzy_word, typos\n\n    def _is_correctable(self, word: str) -> bool:\n        \"\"\"Method returning True if we expect the given word to be corrected\n        upon typo introduction, False otherwise.\n\n        This is necessary to ensure we don't introduce typos in words that\n        can't be corrected, because if we do, it will be counted as error.\n\n        For now, are considered non-correctable :\n         * Words that don't contains any letter (from Unicode standard)\n\n        Args:\n            word (str): Word to classify as correctable or not.\n\n        Returns:\n            True if the word is correctable (and therefore we can introduce\n            typo), False otherwise.\n        \"\"\"\n        # Use the Unicode category `L` (see https://en.wikipedia.org/wiki/Unicode_character_property#General_Category)\n        return not bool(re.match(r\"^[^\\pL]+$\", word))\n\n    def _get_common_typos(self) -> Dict[str, List[str]]:\n        \"\"\"Retrieve the list (if it exists) of plausible common typos to use\n        when introducing typos.\n\n        Returns:\n            Dictionary where the keys are the correct words and the values are\n                the associated possible typos for this word.\n        \"\"\"\n        plang = self.lang.split(\"-\")[0]\n        common_typos_cache_file = os.path.join(CACHE_DIR, f\"{plang}.json\")\n\n        # Try to access the cached common typos, and if it fails, it means we\n        # don't have it locally\n        try:\n            with open(common_typos_cache_file, \"r\") as f:\n                return json.load(f)\n        except FileNotFoundError:\n            pass\n\n        # File is not cached, download & process the common typos from online\n        os.makedirs(os.path.dirname(common_typos_cache_file), exist_ok=True)\n        typos = defaultdict(list)\n        if plang == \"en\":\n            response = requests.get(TWEET_TYPO_CORPUS_URL)\n            for line in response.text.strip().split(\"\\n\"):\n                typoed_word, correct_word, *_ = line.split(\"\\t\")\n                typos[correct_word].append(typoed_word)\n        else:\n            return {}\n\n        # Save the retrieved typos in cache\n        with open(common_typos_cache_file, \"w\") as f:\n            json.dump(typos, f, indent=4)\n\n        return typos\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel.type_till_space","title":"type_till_space(words)","text":"

Method introducing typos word by word.

This method receives a list of words, and type these words while introducing typos. So most of the time, only one word will be typed and the method will return. In some cases, the space is mistyped or deleted, so two words are typed.

Parameters:

Name Type Description Default words List[str]

List of words to type.

required

Returns:

Type Description List[Optional[Tuple[float, float]]]

List of keystrokes (may contains some None).

str

The typed characters as string.

int

The number of words typed.

List[Typo]

The list of typos introduced in the string typed.

Source code in kebbie/noise_model.py
def type_till_space(\n    self,\n    words: List[str],\n) -> Tuple[\n    List[Optional[Tuple[float, float]]],\n    str,\n    int,\n    List[Typo],\n]:\n    \"\"\"Method introducing typos word by word.\n\n    This method receives a list of words, and type these words while\n    introducing typos.\n    So most of the time, only one word will be typed and the method will\n    return. In some cases, the space is mistyped or deleted, so two words\n    are typed.\n\n    Args:\n        words (List[str]): List of words to type.\n\n    Returns:\n        List of keystrokes (may contains some None).\n        The typed characters as string.\n        The number of words typed.\n        The list of typos introduced in the string typed.\n    \"\"\"\n    all_keystrokes = []\n    all_typed_char = \"\"\n    all_typos = []\n\n    for i, word in enumerate(words):\n        # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos\n        error_free = False if self._is_correctable(word) else True\n\n        # Add typos in the word\n        noisy_word, typos = self._introduce_typos(word, error_free=error_free)\n        all_typos += typos\n\n        # Type the word (fuzzy)\n        keystrokes, typed_char, typos = self._fuzzy_type(noisy_word, error_free=error_free)\n        all_keystrokes += keystrokes\n        all_typed_char += typed_char\n        all_typos += typos\n\n        # Then, we try to type a space (separator between words)\n        # TODO : Modify this part for languages without space\n        noisy_space, sp_typo_1 = self._introduce_typos(SPACE)\n        keystrokes, typed_char, sp_typo_2 = self._fuzzy_type(noisy_space)\n\n        # If the space is correctly typed, return now, otherwise type the next word\n        if not sp_typo_1 and not sp_typo_2:\n            break\n        else:\n            all_keystrokes += keystrokes\n            all_typed_char += typed_char\n            all_typos += sp_typo_1 + sp_typo_2\n\n    return all_keystrokes, all_typed_char, i + 1, all_typos\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel.swipe","title":"swipe(word)","text":"

Method for creating an artificial swipe gesture given a word.

Parameters:

Name Type Description Default word str

Word to type with a swipe gesture.

required

Returns:

Type Description Optional[List[Tuple[float, float]]]

Positions (x, y) of the generated swipe gesture, or None if the swipe gesture couldn't be created.

Source code in kebbie/noise_model.py
def swipe(self, word: str) -> Optional[List[Tuple[float, float]]]:\n    \"\"\"Method for creating an artificial swipe gesture given a word.\n\n    Args:\n        word (str): Word to type with a swipe gesture.\n\n    Returns:\n        Positions (x, y) of the generated swipe gesture, or None if the\n            swipe gesture couldn't be created.\n    \"\"\"\n    # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos\n    error_free = False if self._is_correctable(word) else True\n\n    # Get the core keystrokes (fuzzy)\n    keystrokes, *_ = self._fuzzy_type(word, error_free=error_free)\n\n    # If we can swipe that word, create the corresponding artificial gesture\n    if all(keystrokes) and len(keystrokes) > 1:\n        return make_swipe_gesture(keystrokes)\n    else:\n        return None\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel._introduce_typos","title":"_introduce_typos(word, error_free=False)","text":"

Method to introduce typos in a given string.

Either the word is changed into an existing common typo, or the word is processed as a stream of characters, each character having a chance of being mistyped. This method only add regular typos (deletions, additions, etc...), and is not introducing fuzzy typing.

Parameters:

Name Type Description Default word str

Clean string where to add typos.

required error_free bool

If set to True, don't introduce typo. Defaults to False.

False

Returns:

Type Description str

The noisy string.

List[Typo]

The list of typos introduced.

Source code in kebbie/noise_model.py
def _introduce_typos(self, word: str, error_free: bool = False) -> Tuple[str, List[Typo]]:  # noqa: C901\n    \"\"\"Method to introduce typos in a given string.\n\n    Either the word is changed into an existing common typo, or the word is\n    processed as a stream of characters, each character having a chance of\n    being mistyped.\n    This method only add regular typos (deletions, additions, etc...), and\n    is not introducing fuzzy typing.\n\n    Args:\n        word (str): Clean string where to add typos.\n        error_free (bool): If set to True, don't introduce typo. Defaults\n            to False.\n\n    Returns:\n        The noisy string.\n        The list of typos introduced.\n    \"\"\"\n    if error_free:\n        return word, []\n\n    # First of all, we either consider the word as a unit and introduce a\n    # language-specific common typo (if available), or treat the word as a\n    # sequence of character, where each character can have a typo\n    if word in self.common_typos and sample(self.probs[Typo.COMMON_TYPO]):\n        # Introduce a common typo\n        return random.choice(self.common_typos[word]), [Typo.COMMON_TYPO]\n\n    # From here, treat the word as a stream of characters, and potentially\n    # add typos for each character\n    noisy_word = \"\"\n    typos = []\n    word_chars = list(word)\n    for i, char in enumerate(word_chars):\n        # First, potentially apply simplifications (removing accent, or\n        # lowercasing an uppercase character)\n        # Note that if the full word is uppercase, we don't apply lowercase\n        # simplification (doesn't feel like a natural typo a user would do)\n        if char in self.klayout.letter_accents and sample(self.probs[Typo.SIMPLIFY_ACCENT]):\n            char = strip_accents(char)\n            typos.append(Typo.SIMPLIFY_ACCENT)\n        if char.isupper() and len(word) > 1 and not word.isupper() and sample(self.probs[Typo.SIMPLIFY_CASE]):\n            char = char.lower()\n            typos.append(Typo.SIMPLIFY_CASE)\n\n        # Check if this character exists on our keyboard\n        try:\n            *_, klayer_id = self.klayout.get_key_info(char)\n            char_is_on_kb = True\n            char_is_on_default_kb = klayer_id == 0\n        except KeyError:\n            char_is_on_kb = char_is_on_default_kb = False\n\n        # Then, add the possible typo depending on the character type\n        events = []\n        is_first_char = bool(i == 0)\n        is_last_char = bool(i >= (len(word_chars) - 1))\n        if char.isnumeric() or not char_is_on_kb:\n            # Don't introduce typos for numbers or symbols that are not on keyboard\n            pass\n        else:\n            if not is_last_char:\n                # Only transpose char if they are on the same keyboard layer\n                try:\n                    *_, next_char_klayer_id = self.klayout.get_key_info(word[i + 1])\n                except KeyError:\n                    next_char_klayer_id = None\n\n                if klayer_id == next_char_klayer_id:\n                    events.append(Typo.TRANSPOSE_CHAR)\n            if char in self.klayout.spelling_symbols:\n                events.append(Typo.DELETE_SPELLING_SYMBOL)\n                events.append(Typo.ADD_SPELLING_SYMBOL)\n            elif char.isspace():\n                events.append(Typo.DELETE_SPACE)\n                events.append(Typo.ADD_SPACE)\n            elif char in string.punctuation:\n                events.append(Typo.DELETE_PUNCTUATION)\n                events.append(Typo.ADD_PUNCTUATION)\n            elif char_is_on_default_kb:\n                events.append(Typo.DELETE_CHAR)\n                events.append(Typo.ADD_CHAR)\n\n        # If it's the last character (and we are not typing a space),\n        # don't add deletions typos, because it's an auto-completion case,\n        # not auto-correction\n        if is_last_char and word != SPACE:\n            events = [e for e in events if e not in DELETIONS]\n\n        # Get the probabilities for these possible events\n        typo_probs = {e: self.probs[e] for e in events}\n        if is_first_char:\n            # Deleting the first character of the word is not so common, update the probabilities accordingly\n            typo_probs = {e: p * FRONT_DELETION_MULTIPLIER if e in DELETIONS else p for e, p in typo_probs.items()}\n\n        # And sample one of them\n        typo = sample_among(typo_probs)\n\n        # Process the typo\n        if typo is Typo.TRANSPOSE_CHAR:\n            noisy_char = word_chars[i + 1]\n            word_chars[i + 1] = char\n        elif typo in [Typo.DELETE_SPELLING_SYMBOL, Typo.DELETE_SPACE, Typo.DELETE_PUNCTUATION, Typo.DELETE_CHAR]:\n            noisy_char = \"\"\n        elif typo in [Typo.ADD_SPELLING_SYMBOL, Typo.ADD_SPACE, Typo.ADD_PUNCTUATION, Typo.ADD_CHAR]:\n            noisy_char = f\"{char}{char}\"\n        else:  # No typo\n            noisy_char = char\n\n        noisy_word += noisy_char\n        if typo is not None:\n            typos.append(typo)\n\n    return noisy_word, typos\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel._fuzzy_type","title":"_fuzzy_type(word, error_free=False)","text":"

Method adding fuzzy typing.

This method takes a string (potentially already noisy from other type of typos), and fuzzy-type it : simulate a user on a soft-keyboard. This \"fat-finger syndrom\" is simulated using two Gaussian distributions, one for each axis (x, y). This method also returns the generated keystrokes (positions on the keyboard), but only for the default keyboard (ID = 0). Keystrokes from other keyboard are set to None.

Parameters:

Name Type Description Default word str

String to fuzzy-type.

required error_free bool

If set to True, don't introduce typo. Defaults to False.

False

Returns:

Type Description List[Optional[Tuple[float, float]]]

List of keystrokes.

str

Fuzzy string (corresponding to the keystrokes).

List[Typo]

List of typos introduced.

Source code in kebbie/noise_model.py
def _fuzzy_type(\n    self, word: str, error_free: bool = False\n) -> Tuple[List[Optional[Tuple[float, float]]], str, List[Typo]]:\n    \"\"\"Method adding fuzzy typing.\n\n    This method takes a string (potentially already noisy from other type\n    of typos), and fuzzy-type it : simulate a user on a soft-keyboard.\n    This \"fat-finger syndrom\" is simulated using two Gaussian\n    distributions, one for each axis (x, y).\n    This method also returns the generated keystrokes (positions on the\n    keyboard), but only for the default keyboard (ID = 0). Keystrokes from\n    other keyboard are set to None.\n\n    Args:\n        word (str): String to fuzzy-type.\n        error_free (bool): If set to True, don't introduce typo. Defaults\n            to False.\n\n    Returns:\n        List of keystrokes.\n        Fuzzy string (corresponding to the keystrokes).\n        List of typos introduced.\n    \"\"\"\n    fuzzy_word = \"\"\n    keystrokes = []\n    typos = []\n\n    # Type word character by character\n    for char in word:\n        try:\n            width, height, x_center, y_center, klayer_id = self.klayout.get_key_info(char)\n        except KeyError:\n            # This character doesn't exist on the current keyboard\n            # Just type it without introducing typo, like if the user copy-pasted it\n            keystrokes.append(None)\n            fuzzy_word += char\n            continue\n\n        # Sample a keystroke for this character\n        # Note that we don't generate typos for characters outside of the default keyboard\n        if error_free or klayer_id != 0:\n            keystroke = (x_center, y_center)\n        else:\n            # Compute mu and sigma for the Normal distribution\n            x_mu = x_center + self.x_offset\n            y_mu = y_center + self.y_offset\n            x_sigma = (width / 2) / self.x_ratio\n            y_sigma = (height / 2) / self.y_ratio\n\n            # Sample a position (x and y)\n            keystroke = (random.gauss(x_mu, x_sigma), random.gauss(y_mu, y_sigma))\n\n        # Convert it back to a character, to see where we tapped\n        fuzzy_char = self.klayout.get_key(keystroke, klayer_id)\n\n        # Save it (save the keystroke only if part of the default keyboard)\n        keystrokes.append(keystroke if klayer_id == 0 else None)\n        fuzzy_word += fuzzy_char\n        if fuzzy_char != char:\n            typos.append(Typo.SUBSTITUTE_CHAR)\n\n    return keystrokes, fuzzy_word, typos\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel._is_correctable","title":"_is_correctable(word)","text":"

Method returning True if we expect the given word to be corrected upon typo introduction, False otherwise.

This is necessary to ensure we don't introduce typos in words that can't be corrected, because if we do, it will be counted as error.

For now, are considered non-correctable : * Words that don't contains any letter (from Unicode standard)

Parameters:

Name Type Description Default word str

Word to classify as correctable or not.

required

Returns:

Type Description bool

True if the word is correctable (and therefore we can introduce

bool

typo), False otherwise.

Source code in kebbie/noise_model.py
def _is_correctable(self, word: str) -> bool:\n    \"\"\"Method returning True if we expect the given word to be corrected\n    upon typo introduction, False otherwise.\n\n    This is necessary to ensure we don't introduce typos in words that\n    can't be corrected, because if we do, it will be counted as error.\n\n    For now, are considered non-correctable :\n     * Words that don't contains any letter (from Unicode standard)\n\n    Args:\n        word (str): Word to classify as correctable or not.\n\n    Returns:\n        True if the word is correctable (and therefore we can introduce\n        typo), False otherwise.\n    \"\"\"\n    # Use the Unicode category `L` (see https://en.wikipedia.org/wiki/Unicode_character_property#General_Category)\n    return not bool(re.match(r\"^[^\\pL]+$\", word))\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel._get_common_typos","title":"_get_common_typos()","text":"

Retrieve the list (if it exists) of plausible common typos to use when introducing typos.

Returns:

Type Description Dict[str, List[str]]

Dictionary where the keys are the correct words and the values are the associated possible typos for this word.

Source code in kebbie/noise_model.py
def _get_common_typos(self) -> Dict[str, List[str]]:\n    \"\"\"Retrieve the list (if it exists) of plausible common typos to use\n    when introducing typos.\n\n    Returns:\n        Dictionary where the keys are the correct words and the values are\n            the associated possible typos for this word.\n    \"\"\"\n    plang = self.lang.split(\"-\")[0]\n    common_typos_cache_file = os.path.join(CACHE_DIR, f\"{plang}.json\")\n\n    # Try to access the cached common typos, and if it fails, it means we\n    # don't have it locally\n    try:\n        with open(common_typos_cache_file, \"r\") as f:\n            return json.load(f)\n    except FileNotFoundError:\n        pass\n\n    # File is not cached, download & process the common typos from online\n    os.makedirs(os.path.dirname(common_typos_cache_file), exist_ok=True)\n    typos = defaultdict(list)\n    if plang == \"en\":\n        response = requests.get(TWEET_TYPO_CORPUS_URL)\n        for line in response.text.strip().split(\"\\n\"):\n            typoed_word, correct_word, *_ = line.split(\"\\t\")\n            typos[correct_word].append(typoed_word)\n    else:\n        return {}\n\n    # Save the retrieved typos in cache\n    with open(common_typos_cache_file, \"w\") as f:\n        json.dump(typos, f, indent=4)\n\n    return typos\n
"},{"location":"internals/#oraclepy","title":"oracle.py","text":"

Module defining the Oracle class, which is the class taking care of iterating the dataset, introducing typos using the noise model, and querying the Corrector to correct these typos. Then the scorer is used to compute metrics about the performances, and the results are returned.

"},{"location":"internals/#kebbie.oracle.Oracle","title":"Oracle","text":"

Class that takes care of testing a Corrector. It basically gets clean text data, adds noise to it, send the noisy data to the Corrector, and scores its output.

This class spawn multiple processes to decrease runtime.

Parameters:

Name Type Description Default lang str

Language used.

required test_data Dict[str, List[str]]

List of clean sentences for each domain.

required custom_keyboard Dict

If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.

required track_mistakes bool

Set to True for tracking the most common mistakes. Most common mistakes are added to the results dictionary.

required n_most_common_mistakes int

If track_mistakes is set to True, the top X mistakes to record.

required beta float

Beta to use for computing the F-beta score.

required Source code in kebbie/oracle.py
class Oracle:\n    \"\"\"Class that takes care of testing a Corrector. It basically gets clean\n    text data, adds noise to it, send the noisy data to the Corrector, and\n    scores its output.\n\n    This class spawn multiple processes to decrease runtime.\n\n    Args:\n        lang (str): Language used.\n        test_data (Dict[str, List[str]]): List of clean sentences for each\n            domain.\n        custom_keyboard (Dict): If provided, instead of relying on\n            the keyboard layout provided by default, uses the given keyboard\n            layout.\n        track_mistakes (bool): Set to `True` for tracking the most\n            common mistakes. Most common mistakes are added to the results\n            dictionary.\n        n_most_common_mistakes (int): If `track_mistakes` is set to\n            `True`, the top X mistakes to record.\n        beta (float): Beta to use for computing the F-beta score.\n    \"\"\"\n\n    def __init__(\n        self,\n        lang: str,\n        test_data: Dict[str, List[str]],\n        custom_keyboard: Dict,\n        track_mistakes: bool,\n        n_most_common_mistakes: int,\n        beta: float,\n    ) -> None:\n        super().__init__()\n\n        self.lang = lang\n        self.data = test_data\n        self.custom_keyboard = custom_keyboard\n        self.track_mistakes = track_mistakes\n        self.n_most_common_mistakes = n_most_common_mistakes\n        self.beta = beta\n\n    def test(self, corrector: Union[Corrector, List[Corrector]], n_proc: Optional[int], seed: int) -> Dict:\n        \"\"\"Main method, it tests the given Corrector, and returns results as a\n        dictionary.\n\n        This method spawn multiple processes to decrease runtime.\n\n        Args:\n            corrector (Union[Corrector, List[Corrector]]): Corrector to test.\n                If a list of Corrector is given, the argument `n_proc` is\n                ignored, and one corrector is assigned for each process.\n            n_proc (Optional[int]): Number of processes to use. If `None`,\n                `os.cpu_count()` is used.\n            seed (int): Seed to use for running the tests.\n\n        Returns:\n            Results formatted in a dictionary.\n        \"\"\"\n        # Initialize a global Scorer here, that will gather counts across processes\n        scorer = Scorer(domains=self.data.keys(), track_mistakes=self.track_mistakes)\n\n        # For multiprocessing\n        n_proc = n_proc if n_proc is not None else os.cpu_count()\n        d_size = sum(len(d) for d in self.data.values())\n\n        # Create the corrector for each process\n        proc_correctors = mp.Queue()\n        if isinstance(corrector, Corrector):\n            for _ in range(n_proc):\n                proc_correctors.put(corrector)\n        else:\n            # If we already have a list of correctors, assign one for each process\n            n_proc = len(corrector)\n            for c in corrector:\n                proc_correctors.put(c)\n\n        with mp.Pool(\n            processes=n_proc,\n            initializer=init_tester,\n            initargs=(tester, self.lang, self.custom_keyboard, proc_correctors, seed, self.track_mistakes),\n        ) as pool, tqdm(total=d_size) as pbar:\n            # Test data is made of several domain, where each domain contains a list of sentences\n            for domain, sentence_list in self.data.items():\n                chunk_size = max(min(CHUNK_SIZE, len(sentence_list) // n_proc), 1)\n                for scr in pool.imap_unordered(tester, sentence_list, chunksize=chunk_size):\n                    scr.set_domain(domain)\n                    scorer.add(scr)\n                    pbar.update(1)\n\n        # Retrieve the results\n        results = scorer.score(beta=self.beta)\n\n        # Then potentially add the most common mistakes\n        if self.track_mistakes:\n            mistakes = {}\n            for task in [\"nwp\", \"acp\", \"acr\"]:\n                task_name = {\"nwp\": \"next_word_prediction\", \"acp\": \"auto_completion\", \"acr\": \"auto_correction\"}[task]\n\n                m_count = getattr(scorer, f\"{task}_mistakes\")\n\n                mistakes[task_name] = [(\"Count\", \"Expected\", \"Predictions\", \"Context\")]\n                for m, c in m_count.most_common(self.n_most_common_mistakes):\n                    mistakes[task_name].append((c, m.actual, f\"[{', '.join(m.preds)}]\", m.context))\n\n            results[\"most_common_mistakes\"] = mistakes\n\n        return results\n
"},{"location":"internals/#kebbie.oracle.Oracle.test","title":"test(corrector, n_proc, seed)","text":"

Main method, it tests the given Corrector, and returns results as a dictionary.

This method spawn multiple processes to decrease runtime.

Parameters:

Name Type Description Default corrector Union[Corrector, List[Corrector]]

Corrector to test. If a list of Corrector is given, the argument n_proc is ignored, and one corrector is assigned for each process.

required n_proc Optional[int]

Number of processes to use. If None, os.cpu_count() is used.

required seed int

Seed to use for running the tests.

required

Returns:

Type Description Dict

Results formatted in a dictionary.

Source code in kebbie/oracle.py
def test(self, corrector: Union[Corrector, List[Corrector]], n_proc: Optional[int], seed: int) -> Dict:\n    \"\"\"Main method, it tests the given Corrector, and returns results as a\n    dictionary.\n\n    This method spawn multiple processes to decrease runtime.\n\n    Args:\n        corrector (Union[Corrector, List[Corrector]]): Corrector to test.\n            If a list of Corrector is given, the argument `n_proc` is\n            ignored, and one corrector is assigned for each process.\n        n_proc (Optional[int]): Number of processes to use. If `None`,\n            `os.cpu_count()` is used.\n        seed (int): Seed to use for running the tests.\n\n    Returns:\n        Results formatted in a dictionary.\n    \"\"\"\n    # Initialize a global Scorer here, that will gather counts across processes\n    scorer = Scorer(domains=self.data.keys(), track_mistakes=self.track_mistakes)\n\n    # For multiprocessing\n    n_proc = n_proc if n_proc is not None else os.cpu_count()\n    d_size = sum(len(d) for d in self.data.values())\n\n    # Create the corrector for each process\n    proc_correctors = mp.Queue()\n    if isinstance(corrector, Corrector):\n        for _ in range(n_proc):\n            proc_correctors.put(corrector)\n    else:\n        # If we already have a list of correctors, assign one for each process\n        n_proc = len(corrector)\n        for c in corrector:\n            proc_correctors.put(c)\n\n    with mp.Pool(\n        processes=n_proc,\n        initializer=init_tester,\n        initargs=(tester, self.lang, self.custom_keyboard, proc_correctors, seed, self.track_mistakes),\n    ) as pool, tqdm(total=d_size) as pbar:\n        # Test data is made of several domain, where each domain contains a list of sentences\n        for domain, sentence_list in self.data.items():\n            chunk_size = max(min(CHUNK_SIZE, len(sentence_list) // n_proc), 1)\n            for scr in pool.imap_unordered(tester, sentence_list, chunksize=chunk_size):\n                scr.set_domain(domain)\n                scorer.add(scr)\n                pbar.update(1)\n\n    # Retrieve the results\n    results = scorer.score(beta=self.beta)\n\n    # Then potentially add the most common mistakes\n    if self.track_mistakes:\n        mistakes = {}\n        for task in [\"nwp\", \"acp\", \"acr\"]:\n            task_name = {\"nwp\": \"next_word_prediction\", \"acp\": \"auto_completion\", \"acr\": \"auto_correction\"}[task]\n\n            m_count = getattr(scorer, f\"{task}_mistakes\")\n\n            mistakes[task_name] = [(\"Count\", \"Expected\", \"Predictions\", \"Context\")]\n            for m, c in m_count.most_common(self.n_most_common_mistakes):\n                mistakes[task_name].append((c, m.actual, f\"[{', '.join(m.preds)}]\", m.context))\n\n        results[\"most_common_mistakes\"] = mistakes\n\n    return results\n
"},{"location":"internals/#kebbie.oracle.init_tester","title":"init_tester(fn, lang, custom_keyboard, correctors, seed, track_mistakes)","text":"

Function run at process initialization for Tester workers.

Each worker in a Pool will run this function when created. It will instanciate several things needed for testing the given corrector : * A Tokenizer to split sentences into words * A NoiseModel to introduce typos * A Corrector instance, which is the model we want to test

Parameters:

Name Type Description Default fn Callable

Main tester function (instanciated objects will be attached to this function).

required lang str

Language used.

required custom_keyboard Dict

If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.

required correctors Queue

Queue containing list of correctors to test. Each process will get the next corrector available in queue.

required seed int

Base seed to use.

required track_mistakes bool

Set to True for tracking the most common mistakes.

required Source code in kebbie/oracle.py
def init_tester(\n    fn: Callable, lang: str, custom_keyboard: Dict, correctors: mp.Queue, seed: int, track_mistakes: bool\n) -> None:\n    \"\"\"Function run at process initialization for Tester workers.\n\n    Each worker in a Pool will run this function when created. It will\n    instanciate several things needed for testing the given corrector :\n     * A Tokenizer to split sentences into words\n     * A NoiseModel to introduce typos\n     * A Corrector instance, which is the model we want to test\n\n    Args:\n        fn (Callable): Main tester function (instanciated objects will be\n            attached to this function).\n        lang (str): Language used.\n        custom_keyboard (Dict, optional): If provided, instead of relying on\n            the keyboard layout provided by default, uses the given keyboard\n            layout.\n        correctors (mp.Queue): Queue containing list of correctors to test.\n            Each process will get the next corrector available in queue.\n        seed (int): Base seed to use.\n        track_mistakes (bool): Set to `True` for tracking the most common\n            mistakes.\n    \"\"\"\n    fn.tokenizer = BasicTokenizer()\n    fn.noisy = NoiseModel(lang, custom_keyboard=custom_keyboard)\n    fn.corrector = correctors.get()\n    fn.base_seed = seed\n    fn.track_mistakes = track_mistakes\n
"},{"location":"internals/#kebbie.oracle.tester","title":"tester(sentence)","text":"

Function to test a given sentence.

It uses the noise model to introduce typos word by word, run the Corrector on various tasks (auto-completion, auto-correction, next-word prediction), and score the results.

Parameters:

Name Type Description Default sentence str

Sentence to use as data for the test.

required

Returns:

Type Description Scorer

Scorer class with the prediction counts for this sentence.

Source code in kebbie/oracle.py
def tester(sentence: str) -> Scorer:\n    \"\"\"Function to test a given sentence.\n\n    It uses the noise model to introduce typos word by word, run the\n    Corrector on various tasks (auto-completion, auto-correction, next-word\n    prediction), and score the results.\n\n    Args:\n        sentence (str): Sentence to use as data for the test.\n\n    Returns:\n        Scorer class with the prediction counts for this sentence.\n    \"\"\"\n    # Set the seed for reproducibility, using the hash of the sentence\n    hsh = int(hashlib.sha256(sentence.encode(\"utf-8\")).hexdigest(), 16)\n    random.seed(tester.base_seed + hsh)\n    rnd_state = random.getstate()\n\n    # Tokenize the sentence into words\n    sentence = tester.tokenizer.preprocess(sentence)\n    words = tester.tokenizer.word_split(sentence)\n\n    context = \"\"\n    # Keep track for predictions counts with a local scorer, for this sentence\n    scorer = Scorer(domains=[None], track_mistakes=tester.track_mistakes)\n    while words and len(context) < MAX_CHAR_PER_SENTENCE:\n        # Before randomly generating typo, set the random state for determinism\n        random.setstate(rnd_state)\n\n        # It's slow to generate swipe gesture every sentence, so run it just sometimes\n        word_to_swipe = words[0]\n        swipe_gesture = tester.noisy.swipe(word_to_swipe) if sample(SWIPE_PROB) else None\n\n        # Generate noisy keystrokes for the next word(s)\n        keystrokes, typed_word, n_word_typed, typos = tester.noisy.type_till_space(words)\n\n        # Get the clean word(s), update the remaining words to type and get the next word\n        actual_word = \" \".join(words[:n_word_typed])\n        words = words[n_word_typed:]\n        next_word = words[0] if len(words) > 0 else None\n\n        # We are done with generating typo, save the random state for the next iteration\n        rnd_state = random.getstate()\n\n        if swipe_gesture:\n            # Call the swipe model\n            preds, memory, runtime = tester.corrector.profiled_resolve_swipe(context, swipe_gesture)\n            scorer.swp(word_to_swipe, preds, context=context, memory=memory, runtime=runtime)\n\n        # Call the model for auto-completion (for long enough words)\n        if len(typed_word) > 1 and len(actual_word) > 1:\n            partial_keystrokes, partial_word = sample_partial_word(keystrokes, typed_word, actual_word)\n            preds, memory, runtime = tester.corrector.profiled_auto_complete(context, partial_keystrokes, partial_word)\n            scorer.acp(actual_word, preds, partial_word=partial_word, context=context, memory=memory, runtime=runtime)\n\n        # Call the model for auto-correction\n        preds, memory, runtime = tester.corrector.profiled_auto_correct(context, keystrokes, typed_word)\n        scorer.acr(\n            actual_word, preds, typed_word=typed_word, context=context, typos=typos, memory=memory, runtime=runtime\n        )\n\n        # Update the context for the next iteration (input forcing)\n        context = tester.tokenizer.update_context(context, actual_word)\n\n        # Call the model for next-word prediction\n        if next_word:\n            preds, memory, runtime = tester.corrector.profiled_predict_next_word(context)\n            scorer.nwp(next_word, preds, context=context, memory=memory, runtime=runtime)\n\n    return scorer\n
"},{"location":"internals/#scorerpy","title":"scorer.py","text":"

Module implementing Scorer, a class that keep track of how many errors the model is making, and output various corresponding metrics.

"},{"location":"internals/#kebbie.scorer.Count","title":"Count dataclass","text":"

Structure representing the most basic counts for a task.

It counts : * Number of correct predictions * Number of top3-correct predictions * Total number of predictions

Source code in kebbie/scorer.py
@dataclass\nclass Count:\n    \"\"\"Structure representing the most basic counts for a task.\n\n    It counts :\n    * Number of correct predictions\n    * Number of top3-correct predictions\n    * Total number of predictions\n    \"\"\"\n\n    correct: int = 0  # Number of times the first prediction was correct\n    correct_3: int = 0  # Number of times one of the top-3 predictions was correct\n    total: int = 0  # Total number of predictions\n\n    def __add__(self, count: Count) -> Count:\n        \"\"\"Merge two `Count` instance by adding their counts.\n\n        Args:\n            count (Count): Count instance to add.\n\n        Returns:\n            Merged Count.\n        \"\"\"\n        return Count(\n            correct=self.correct + count.correct,\n            correct_3=self.correct_3 + count.correct_3,\n            total=self.total + count.total,\n        )\n\n    def __mul__(self, proportion: float) -> Count:\n        \"\"\"Multiply the current `Count` instance by a given proportion.\n\n        Args:\n            proportion (float): Proportion to multiply by.\n\n        Returns:\n            Count with the right proportion.\n        \"\"\"\n        return Count(\n            correct=round(self.correct * proportion),\n            correct_3=round(self.correct_3 * proportion),\n            total=round(self.total * proportion),\n        )\n
"},{"location":"internals/#kebbie.scorer.Count.__add__","title":"__add__(count)","text":"

Merge two Count instance by adding their counts.

Parameters:

Name Type Description Default count Count

Count instance to add.

required

Returns:

Type Description Count

Merged Count.

Source code in kebbie/scorer.py
def __add__(self, count: Count) -> Count:\n    \"\"\"Merge two `Count` instance by adding their counts.\n\n    Args:\n        count (Count): Count instance to add.\n\n    Returns:\n        Merged Count.\n    \"\"\"\n    return Count(\n        correct=self.correct + count.correct,\n        correct_3=self.correct_3 + count.correct_3,\n        total=self.total + count.total,\n    )\n
"},{"location":"internals/#kebbie.scorer.Count.__mul__","title":"__mul__(proportion)","text":"

Multiply the current Count instance by a given proportion.

Parameters:

Name Type Description Default proportion float

Proportion to multiply by.

required

Returns:

Type Description Count

Count with the right proportion.

Source code in kebbie/scorer.py
def __mul__(self, proportion: float) -> Count:\n    \"\"\"Multiply the current `Count` instance by a given proportion.\n\n    Args:\n        proportion (float): Proportion to multiply by.\n\n    Returns:\n        Count with the right proportion.\n    \"\"\"\n    return Count(\n        correct=round(self.correct * proportion),\n        correct_3=round(self.correct_3 * proportion),\n        total=round(self.total * proportion),\n    )\n
"},{"location":"internals/#kebbie.scorer.Mistake","title":"Mistake dataclass","text":"

Structure representing a mistake (including the context of the mistake, the expected word and the predictions).

Source code in kebbie/scorer.py
@dataclass(eq=True, frozen=True)\nclass Mistake:\n    \"\"\"Structure representing a mistake (including the context of the mistake,\n    the expected word and the predictions).\n    \"\"\"\n\n    actual: str = field(compare=True)\n    preds: List[str] = field(compare=False)\n    context: str = field(compare=False)\n
"},{"location":"internals/#kebbie.scorer.Scorer","title":"Scorer","text":"

Class keeping track of the predictions and how correct they are, but also computing the associated score for each task after the end of test.

Parameters:

Name Type Description Default domains List[str]

The list of domains in the dataset. The Scorer keeps track of the score for each domain, so that we can spot discrepancies between domain, if any.

required human_readable bool

If set to False, performance metrics (memory, runtime) are kept in their raw, numeral form. If set to True, these are converted to a human readable string.

True track_mistakes bool

Set to True for tracking the most common mistakes.

False Source code in kebbie/scorer.py
class Scorer:\n    \"\"\"Class keeping track of the predictions and how correct they are, but\n    also computing the associated score for each task after the end of test.\n\n    Args:\n        domains (List[str]): The list of domains in the dataset. The Scorer\n            keeps track of the score for each domain, so that we can spot\n            discrepancies between domain, if any.\n        human_readable (bool, optional): If set to `False`, performance metrics\n            (memory, runtime) are kept in their raw, numeral form. If set to\n            `True`, these are converted to a human readable string.\n        track_mistakes (bool, optional): Set to `True` for tracking the most\n            common mistakes.\n    \"\"\"\n\n    def __init__(self, domains: List[str], human_readable: bool = True, track_mistakes: bool = False) -> None:\n        self.human_readable = human_readable\n\n        # For each task, create a dictionary of Counts\n        # Each task has a different structure :\n\n        # Next-word prediction : [domain] -> counts\n        self.nwp_c = dd_x_layers(1)\n\n        # Autocompletion : [domain] -> [typo/no_typo] -> [word_completion_rate] -> counts\n        self.acp_c = dd_x_layers(3)\n\n        # Autocorrection : [domain] -> [typo type/number of typo] -> counts\n        self.acr_c = dd_x_layers(2)\n\n        # Swipe resolution : [domain] -> counts\n        self.swp_c = dd_x_layers(1)\n\n        # Make sure we track each domain (create a 0-Count for each domain)\n        for d in domains:\n            _ = self.nwp_c[d], self.acp_c[d][WITH_TYPO][0], self.acr_c[d][None], self.swp_c[d]\n\n        # Also keep track of memories & runtimes\n        self.nwp_memories = []\n        self.acp_memories = []\n        self.acr_memories = []\n        self.swp_memories = []\n        self.nwp_runtimes = []\n        self.acp_runtimes = []\n        self.acr_runtimes = []\n        self.swp_runtimes = []\n\n        # Optionally track common mistakes\n        self.track_mistakes = track_mistakes\n        self.nwp_mistakes = Counter()\n        self.acp_mistakes = Counter()\n        self.acr_mistakes = Counter()\n        self.swp_mistakes = Counter()\n\n    def add(self, scorer) -> None:\n        \"\"\"Method to update the current Scorer with the counts from another\n        Scorer.\n\n        Args:\n            scorer (Scorer): Scorer to add.\n        \"\"\"\n\n        def update(d1, d2):\n            for k in d2:\n                if isinstance(d2[k], Count):\n                    d1[k] += d2[k]\n                else:\n                    update(d1[k], d2[k])\n\n        update(self.nwp_c, scorer.nwp_c)\n        update(self.acp_c, scorer.acp_c)\n        update(self.acr_c, scorer.acr_c)\n        update(self.swp_c, scorer.swp_c)\n        self.nwp_memories.extend(scorer.nwp_memories)\n        self.acp_memories.extend(scorer.acp_memories)\n        self.acr_memories.extend(scorer.acr_memories)\n        self.swp_memories.extend(scorer.swp_memories)\n        self.nwp_runtimes.extend(scorer.nwp_runtimes)\n        self.acp_runtimes.extend(scorer.acp_runtimes)\n        self.acr_runtimes.extend(scorer.acr_runtimes)\n        self.swp_runtimes.extend(scorer.swp_runtimes)\n        self.nwp_mistakes.update(scorer.nwp_mistakes)\n        self.acp_mistakes.update(scorer.acp_mistakes)\n        self.acr_mistakes.update(scorer.acr_mistakes)\n        self.swp_mistakes.update(scorer.swp_mistakes)\n\n    def nwp(\n        self,\n        true_word: str,\n        predicted_words: List[str],\n        context: str,\n        memory: int,\n        runtime: int,\n        domain: Optional[str] = None,\n    ) -> None:\n        \"\"\"Method used to record a prediction for the next-word prediction\n        task.\n\n        Args:\n            true_word (str): The label (clean word to predict).\n            predicted_words (List[str]): Predictions of the model.\n            context (str): The context (previous words in the sentence).\n            memory (int): Memory consumption for the call of the model.\n            runtime (int): Runtime for the call of the model.\n            domain (str): Domain of this prediction.\n        \"\"\"\n        # Record memory & runtime\n        if memory >= 0:\n            self.nwp_memories.append(memory)\n        if runtime >= 0:\n            self.nwp_runtimes.append(runtime)\n\n        # Record counts\n        if len(predicted_words) > 0 and predicted_words[0] == true_word:\n            self.nwp_c[domain].correct += 1\n        if true_word in predicted_words[:3]:\n            self.nwp_c[domain].correct_3 += 1\n        else:\n            # If the word is not in the top-3 predictions, this is a mistake\n            if self.track_mistakes:\n                self.nwp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])\n\n        self.nwp_c[domain].total += 1\n\n    def acp(\n        self,\n        true_word: str,\n        predicted_words: List[str],\n        partial_word: str,\n        context: str,\n        memory: int,\n        runtime: int,\n        domain: Optional[str] = None,\n    ) -> None:\n        \"\"\"Method used to record a prediction for the auto-completion task.\n\n        Args:\n            true_word (str): The label (clean word to predict).\n            predicted_words (List[str]): Predictions of the model.\n            partial_word (str): The input sent to the model (only part of the\n                word to predict, with potential typos).\n            context (str): The context (previous words in the sentence).\n            memory (int): Memory consumption for the call of the model.\n            runtime (int): Runtime for the call of the model.\n            domain (str): Domain of this prediction.\n        \"\"\"\n        # Record memory & runtime\n        if memory >= 0:\n            self.acp_memories.append(memory)\n        if runtime >= 0:\n            self.acp_runtimes.append(runtime)\n\n        # Check if a typo was introduced or not\n        has_typo = WITHOUT_TYPO if true_word.startswith(partial_word) else WITH_TYPO\n\n        # Compute the completion rate\n        completion_rate = round(len(partial_word) / len(true_word), 2)\n\n        # Record counts\n        if len(predicted_words) > 0 and predicted_words[0] == true_word:\n            self.acp_c[domain][has_typo][completion_rate].correct += 1\n        if true_word in predicted_words[:3]:\n            self.acp_c[domain][has_typo][completion_rate].correct_3 += 1\n        else:\n            # If the word is not in the top-3 predictions, this is a mistake\n            if self.track_mistakes:\n                self.acp_mistakes.update(\n                    [Mistake(actual=true_word, preds=predicted_words[:3], context=f\"{context}{partial_word}\")]\n                )\n\n        self.acp_c[domain][has_typo][completion_rate].total += 1\n\n    def acr(\n        self,\n        true_word: str,\n        predicted_words: List[str],\n        typed_word: str,\n        context: str,\n        typos: List[Typo],\n        memory: int,\n        runtime: int,\n        domain: Optional[str] = None,\n    ) -> None:\n        \"\"\"Method used to record a prediction for the auto-correction task.\n\n        Args:\n            true_word (str): The label (clean word to predict).\n            predicted_words (List[str]): Predictions of the model.\n            typed_word (str): The word typed, containing potential typos.\n            context (str): The context (previous words in the sentence).\n            typos (List[Typo]): List of typos introduced.\n            memory (int): Memory consumption for the call of the model.\n            runtime (int): Runtime for the call of the model.\n            domain (str): Domain of this prediction.\n        \"\"\"\n        # Record memory & runtime\n        if memory >= 0:\n            self.acr_memories.append(memory)\n        if runtime >= 0:\n            self.acr_runtimes.append(runtime)\n\n        # Get the type of typo\n        if not typos:\n            typo_type = None\n        elif len(typos) == 1:\n            typo_type = typos[0]\n        else:\n            typo_type = len(typos)\n\n        # Record counts\n        if len(predicted_words) > 0 and predicted_words[0] == true_word:\n            self.acr_c[domain][typo_type].correct += 1\n        if true_word in predicted_words[:3]:\n            self.acr_c[domain][typo_type].correct_3 += 1\n        else:\n            # If the word is not in the top-3 predictions, this is a mistake\n            if self.track_mistakes:\n                self.acr_mistakes.update(\n                    [Mistake(actual=true_word, preds=predicted_words[:3], context=f\"{context}{typed_word}\")]\n                )\n\n        self.acr_c[domain][typo_type].total += 1\n\n    def swp(\n        self,\n        true_word: str,\n        predicted_words: List[str],\n        context: str,\n        memory: int,\n        runtime: int,\n        domain: Optional[str] = None,\n    ) -> None:\n        \"\"\"Method used to record a prediction for the swipe resolution task.\n\n        Args:\n            true_word (str): The label (clean word to predict).\n            predicted_words (List[str]): Predictions of the model.\n            context (str): The context (previous words in the sentence).\n            memory (int): Memory consumption for the call of the model.\n            runtime (int): Runtime for the call of the model.\n            domain (str): Domain of this prediction.\n        \"\"\"\n        # Record memory & runtime\n        if memory >= 0:\n            self.swp_memories.append(memory)\n        if runtime >= 0:\n            self.swp_runtimes.append(runtime)\n\n        # Record counts\n        if len(predicted_words) > 0 and predicted_words[0] == true_word:\n            self.swp_c[domain].correct += 1\n        if true_word in predicted_words[:3]:\n            self.swp_c[domain].correct_3 += 1\n        else:\n            # If the word is not in the top-3 predictions, this is a mistake\n            if self.track_mistakes:\n                self.swp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])\n\n        self.swp_c[domain].total += 1\n\n    def set_domain(self, domain: str) -> None:\n        \"\"\"Method setting the domain for the scores associated with no domain.\n\n        To make it easier to score a single sentence, it's possible to call the\n        scorer without a domain (see signature of `nwp()`, `acp()`, `acr()`).\n        In this case the scores are associated to no domain (`None` key).\n        This method allows the user to set the domain name for these scores\n        with no domain (effectively moving the `None` domain scores to the\n        given domain name).\n\n        Note:\n            If some scores were already linked to the given domain, these\n            scores will be erased (replaced by the scores of the `None`\n            domain).\n\n        Args:\n            domain (str): Domain name to associate the scores to.\n        \"\"\"\n        if None in self.nwp_c:\n            self.nwp_c[domain] = self.nwp_c.pop(None)\n        if None in self.acp_c:\n            self.acp_c[domain] = self.acp_c.pop(None)\n        if None in self.acr_c:\n            self.acr_c[domain] = self.acr_c.pop(None)\n        if None in self.swp_c:\n            self.swp_c[domain] = self.swp_c.pop(None)\n\n    def _score_accuracy(self, c: Count) -> Dict:\n        \"\"\"Helper method to compute the accuracy given a prediction count.\n\n        This method return a dictionary with 3 metrics :\n         * Accuracy\n         * Top3 accuracy\n         * Total number of predictions\n\n        Args:\n            c (Count): Count object to use to compute the accuracy.\n\n        Returns:\n            Dictionary with the computed metrics.\n        \"\"\"\n        return {\n            \"accuracy\": round_to_n(c.correct / c.total) if c.total != 0 else 0,\n            \"top3_accuracy\": round_to_n(c.correct_3 / c.total) if c.total != 0 else 0,\n            \"n\": c.total,\n        }\n\n    def _score_precision_recall(self, no_typo_c: Count, typo_c: Count, beta: float) -> Dict:\n        \"\"\"Helper method to compute the precision and recall for\n        auto-correction.\n\n        This method return a dictionary with several metrics :\n         * Accuracy\n         * Precision\n         * Recall\n         * F-score\n         * Top3 accuracy\n         * Top3 precision\n         * Top3 recall\n         * Top3 F-score\n         * Number of predictions with a typo\n         * Total number of predictions\n\n        For auto-correction, we need 2 Count objects : the counts of typos, and\n        the counts of non-typo (to compute the True Negative and False Positive\n        metrics).\n\n        Args:\n            no_typo_c (Count): Count object for the predictions where no typo\n                were added.\n            typo_c (Count): Count object for the predictions where typos were\n                added.\n            beta (float): Beta to use for computing the F-beta score.\n\n        Returns:\n            Dictionary with the computed metrics.\n        \"\"\"\n        # The first step is to divide the counts into TN, FP, TP, FN\n        tn = no_typo_c.correct\n        fp = no_typo_c.total - no_typo_c.correct\n        tp = typo_c.correct\n        fn = typo_c.total - typo_c.correct\n\n        tn_3 = no_typo_c.correct_3\n        fp_3 = no_typo_c.total - no_typo_c.correct_3\n        tp_3 = typo_c.correct_3\n        fn_3 = typo_c.total - typo_c.correct_3\n\n        # Then we compute the metrics\n        p = precision(tp=tp, fp=fp)\n        r = recall(tp=tp, fn=fn)\n\n        p_3 = precision(tp=tp_3, fp=fp_3)\n        r_3 = recall(tp=tp_3, fn=fn_3)\n\n        return {\n            \"accuracy\": round_to_n(accuracy(tp=tp, tn=tn, fp=fp, fn=fn)),\n            \"precision\": round_to_n(p),\n            \"recall\": round_to_n(r),\n            \"fscore\": round_to_n(fbeta(precision=p, recall=r, beta=beta)),\n            \"top3_accuracy\": round_to_n(accuracy(tp=tp_3, tn=tn_3, fp=fp_3, fn=fn_3)),\n            \"top3_precision\": round_to_n(p_3),\n            \"top3_recall\": round_to_n(r_3),\n            \"top3_fscore\": round_to_n(fbeta(precision=p_3, recall=r_3, beta=beta)),\n            \"n_typo\": typo_c.total,\n            \"n\": no_typo_c.total + typo_c.total,\n        }\n\n    def _score_performances(self, memories: List[int], runtimes: List[int]) -> Dict:\n        \"\"\"Helper method to compute metrics related to the memory & runtime.\n\n        This method returns a dictionary with several metrics :\n         * The mean memory consumption\n         * The min memory consumption\n         * The max memory consumption\n         * The mean running time\n         * The fastest running time\n         * The slowest running time\n\n        Args:\n            memories (List[int]): List of memories consumptions for a\n                specific operation.\n            runtimes (List[int]): List of runtimes for a specific operation.\n\n        Returns:\n            Dictionary with the computed metrics.\n        \"\"\"\n        perf = {\n            \"mean_memory\": stats.mean(memories) if memories else 0,\n            \"min_memory\": min(memories) if memories else 0,\n            \"max_memory\": max(memories) if memories else 0,\n            \"mean_runtime\": stats.mean(runtimes) if runtimes else 0,\n            \"fastest_runtime\": min(runtimes) if runtimes else 0,\n            \"slowest_runtime\": max(runtimes) if runtimes else 0,\n        }\n\n        if self.human_readable:\n            perf = {\n                name: human_readable_memory(x) if name.endswith(\"memory\") else human_readable_runtime(x)\n                for name, x in perf.items()\n            }\n\n        return perf\n\n    def score(self, beta: float = DEFAULT_BETA) -> Dict:  # noqa: C901\n        \"\"\"Method that computes the final scores (as well as some alternative\n        metrics that can bring insight in the capabilities of the model), and\n        output these in an organized dictionary.\n\n        Args:\n            beta (float, optional): Beta to use for computing the F-beta score.\n\n        Returns:\n            Dictionary containing the computed scores and metrics for the\n            model tested.\n        \"\"\"\n        # --- Next-word prediction ---\n        # Group scores by domain\n        per = defaultdict(Count)\n        for domain, c in self.nwp_c.items():\n            per[domain] += c\n        total_c = sum(per.values(), Count())\n        per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n        # Task results\n        nwp = {\n            \"score\": self._score_accuracy(total_c),\n            \"per_domain\": per_domain,\n            \"performances\": self._score_performances(self.nwp_memories, self.nwp_runtimes),\n        }\n\n        # --- Auto-completion ---\n        # Group scores by domain\n        per = defaultdict(Count)\n        for domain, d1 in self.acp_c.items():\n            for has_typo, d2 in d1.items():\n                for compl_rate, c in d2.items():\n                    per[domain] += c\n        total_c = sum(per.values(), Count())\n        per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n        # Group scores by completion rate\n        per = defaultdict(Count)\n        for domain, d1 in self.acp_c.items():\n            for has_typo, d2 in d1.items():\n                for compl_rate, c in d2.items():\n                    per[compl_rate] += c\n        per_compl_rate = {\n            \"<25%\": self._score_accuracy(sum((c for k, c in per.items() if k < 0.25), Count())),\n            \"25%~50%\": self._score_accuracy(sum((c for k, c in per.items() if 0.25 <= k < 0.5), Count())),\n            \"50%~75%\": self._score_accuracy(sum((c for k, c in per.items() if 0.5 <= k < 0.75), Count())),\n            \">75%\": self._score_accuracy(sum((c for k, c in per.items() if 0.75 <= k), Count())),\n        }\n\n        # Group scores by with_typo / without_typo\n        per = defaultdict(Count)\n        for domain, d1 in self.acp_c.items():\n            for has_typo, d2 in d1.items():\n                for compl_rate, c in d2.items():\n                    per[has_typo] += c\n        per_other = {k: self._score_accuracy(per[k]) for k in [WITHOUT_TYPO, WITH_TYPO]}\n\n        # Task results\n        acp = {\n            \"score\": self._score_accuracy(total_c),\n            \"per_domain\": per_domain,\n            \"per_completion_rate\": per_compl_rate,\n            \"per_other\": per_other,\n            \"performances\": self._score_performances(self.acp_memories, self.acp_runtimes),\n        }\n\n        # --- Auto-correction ---\n        # Group scores by domain\n        no_typo_per, typo_per = defaultdict(Count), defaultdict(Count)\n        for domain, d1 in self.acr_c.items():\n            for typo, c in d1.items():\n                if typo is None:\n                    no_typo_per[domain] += c\n                else:\n                    typo_per[domain] += c\n        no_typo_total_c = sum(no_typo_per.values(), Count())\n        typo_total_c = sum(typo_per.values(), Count())\n        per_domain = {k: self._score_precision_recall(no_typo_per[k], typo_per[k], beta=beta) for k in no_typo_per}\n\n        # Group scores by typo type\n        no_typo_c, typo_per = Count(), defaultdict(Count)\n        for domain, d1 in self.acr_c.items():\n            for typo, c in d1.items():\n                if typo is None:\n                    no_typo_c += c\n                else:\n                    typo_per[typo] += c\n        # Divide the total count of no-typo into each type of typos with the right proportions\n        no_typo_per = defaultdict(Count, {k: no_typo_c * (c.total / typo_total_c.total) for k, c in typo_per.items()})\n        per_typo_type = {t.name: self._score_precision_recall(no_typo_per[t], typo_per[t], beta=beta) for t in Typo}\n        per_n_typo = {\n            \"1\": self._score_precision_recall(\n                sum((c for k, c in no_typo_per.items() if isinstance(k, Typo)), Count()),\n                sum((c for k, c in typo_per.items() if isinstance(k, Typo)), Count()),\n                beta=beta,\n            ),\n            \"2\": self._score_precision_recall(no_typo_per[2], typo_per[2], beta=beta),\n            \"3+\": self._score_precision_recall(\n                sum((c for k, c in no_typo_per.items() if isinstance(k, int) and k > 2), Count()),\n                sum((c for k, c in typo_per.items() if isinstance(k, int) and k > 2), Count()),\n                beta=beta,\n            ),\n        }\n\n        # Task results\n        acr = {\n            \"score\": self._score_precision_recall(no_typo_total_c, typo_total_c, beta=beta),\n            \"per_domain\": per_domain,\n            \"per_typo_type\": per_typo_type,\n            \"per_number_of_typos\": per_n_typo,\n            \"performances\": self._score_performances(self.acr_memories, self.acr_runtimes),\n        }\n\n        # --- Swipe resolution ---\n        # Group scores by domain\n        per = defaultdict(Count)\n        for domain, c in self.swp_c.items():\n            per[domain] += c\n        total_c = sum(per.values(), Count())\n        per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n        # Task results\n        swp = {\n            \"score\": self._score_accuracy(total_c),\n            \"per_domain\": per_domain,\n            \"performances\": self._score_performances(self.swp_memories, self.swp_runtimes),\n        }\n\n        # Final results\n        results = {\n            \"next_word_prediction\": nwp,\n            \"auto_completion\": acp,\n            \"auto_correction\": acr,\n            \"swipe_resolution\": swp,\n        }\n\n        # Add the overall score\n        results[\"overall_score\"] = one_score(results)\n\n        return results\n
"},{"location":"internals/#kebbie.scorer.Scorer.add","title":"add(scorer)","text":"

Method to update the current Scorer with the counts from another Scorer.

Parameters:

Name Type Description Default scorer Scorer

Scorer to add.

required Source code in kebbie/scorer.py
def add(self, scorer) -> None:\n    \"\"\"Method to update the current Scorer with the counts from another\n    Scorer.\n\n    Args:\n        scorer (Scorer): Scorer to add.\n    \"\"\"\n\n    def update(d1, d2):\n        for k in d2:\n            if isinstance(d2[k], Count):\n                d1[k] += d2[k]\n            else:\n                update(d1[k], d2[k])\n\n    update(self.nwp_c, scorer.nwp_c)\n    update(self.acp_c, scorer.acp_c)\n    update(self.acr_c, scorer.acr_c)\n    update(self.swp_c, scorer.swp_c)\n    self.nwp_memories.extend(scorer.nwp_memories)\n    self.acp_memories.extend(scorer.acp_memories)\n    self.acr_memories.extend(scorer.acr_memories)\n    self.swp_memories.extend(scorer.swp_memories)\n    self.nwp_runtimes.extend(scorer.nwp_runtimes)\n    self.acp_runtimes.extend(scorer.acp_runtimes)\n    self.acr_runtimes.extend(scorer.acr_runtimes)\n    self.swp_runtimes.extend(scorer.swp_runtimes)\n    self.nwp_mistakes.update(scorer.nwp_mistakes)\n    self.acp_mistakes.update(scorer.acp_mistakes)\n    self.acr_mistakes.update(scorer.acr_mistakes)\n    self.swp_mistakes.update(scorer.swp_mistakes)\n
"},{"location":"internals/#kebbie.scorer.Scorer.nwp","title":"nwp(true_word, predicted_words, context, memory, runtime, domain=None)","text":"

Method used to record a prediction for the next-word prediction task.

Parameters:

Name Type Description Default true_word str

The label (clean word to predict).

required predicted_words List[str]

Predictions of the model.

required context str

The context (previous words in the sentence).

required memory int

Memory consumption for the call of the model.

required runtime int

Runtime for the call of the model.

required domain str

Domain of this prediction.

None Source code in kebbie/scorer.py
def nwp(\n    self,\n    true_word: str,\n    predicted_words: List[str],\n    context: str,\n    memory: int,\n    runtime: int,\n    domain: Optional[str] = None,\n) -> None:\n    \"\"\"Method used to record a prediction for the next-word prediction\n    task.\n\n    Args:\n        true_word (str): The label (clean word to predict).\n        predicted_words (List[str]): Predictions of the model.\n        context (str): The context (previous words in the sentence).\n        memory (int): Memory consumption for the call of the model.\n        runtime (int): Runtime for the call of the model.\n        domain (str): Domain of this prediction.\n    \"\"\"\n    # Record memory & runtime\n    if memory >= 0:\n        self.nwp_memories.append(memory)\n    if runtime >= 0:\n        self.nwp_runtimes.append(runtime)\n\n    # Record counts\n    if len(predicted_words) > 0 and predicted_words[0] == true_word:\n        self.nwp_c[domain].correct += 1\n    if true_word in predicted_words[:3]:\n        self.nwp_c[domain].correct_3 += 1\n    else:\n        # If the word is not in the top-3 predictions, this is a mistake\n        if self.track_mistakes:\n            self.nwp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])\n\n    self.nwp_c[domain].total += 1\n
"},{"location":"internals/#kebbie.scorer.Scorer.acp","title":"acp(true_word, predicted_words, partial_word, context, memory, runtime, domain=None)","text":"

Method used to record a prediction for the auto-completion task.

Parameters:

Name Type Description Default true_word str

The label (clean word to predict).

required predicted_words List[str]

Predictions of the model.

required partial_word str

The input sent to the model (only part of the word to predict, with potential typos).

required context str

The context (previous words in the sentence).

required memory int

Memory consumption for the call of the model.

required runtime int

Runtime for the call of the model.

required domain str

Domain of this prediction.

None Source code in kebbie/scorer.py
def acp(\n    self,\n    true_word: str,\n    predicted_words: List[str],\n    partial_word: str,\n    context: str,\n    memory: int,\n    runtime: int,\n    domain: Optional[str] = None,\n) -> None:\n    \"\"\"Method used to record a prediction for the auto-completion task.\n\n    Args:\n        true_word (str): The label (clean word to predict).\n        predicted_words (List[str]): Predictions of the model.\n        partial_word (str): The input sent to the model (only part of the\n            word to predict, with potential typos).\n        context (str): The context (previous words in the sentence).\n        memory (int): Memory consumption for the call of the model.\n        runtime (int): Runtime for the call of the model.\n        domain (str): Domain of this prediction.\n    \"\"\"\n    # Record memory & runtime\n    if memory >= 0:\n        self.acp_memories.append(memory)\n    if runtime >= 0:\n        self.acp_runtimes.append(runtime)\n\n    # Check if a typo was introduced or not\n    has_typo = WITHOUT_TYPO if true_word.startswith(partial_word) else WITH_TYPO\n\n    # Compute the completion rate\n    completion_rate = round(len(partial_word) / len(true_word), 2)\n\n    # Record counts\n    if len(predicted_words) > 0 and predicted_words[0] == true_word:\n        self.acp_c[domain][has_typo][completion_rate].correct += 1\n    if true_word in predicted_words[:3]:\n        self.acp_c[domain][has_typo][completion_rate].correct_3 += 1\n    else:\n        # If the word is not in the top-3 predictions, this is a mistake\n        if self.track_mistakes:\n            self.acp_mistakes.update(\n                [Mistake(actual=true_word, preds=predicted_words[:3], context=f\"{context}{partial_word}\")]\n            )\n\n    self.acp_c[domain][has_typo][completion_rate].total += 1\n
"},{"location":"internals/#kebbie.scorer.Scorer.acr","title":"acr(true_word, predicted_words, typed_word, context, typos, memory, runtime, domain=None)","text":"

Method used to record a prediction for the auto-correction task.

Parameters:

Name Type Description Default true_word str

The label (clean word to predict).

required predicted_words List[str]

Predictions of the model.

required typed_word str

The word typed, containing potential typos.

required context str

The context (previous words in the sentence).

required typos List[Typo]

List of typos introduced.

required memory int

Memory consumption for the call of the model.

required runtime int

Runtime for the call of the model.

required domain str

Domain of this prediction.

None Source code in kebbie/scorer.py
def acr(\n    self,\n    true_word: str,\n    predicted_words: List[str],\n    typed_word: str,\n    context: str,\n    typos: List[Typo],\n    memory: int,\n    runtime: int,\n    domain: Optional[str] = None,\n) -> None:\n    \"\"\"Method used to record a prediction for the auto-correction task.\n\n    Args:\n        true_word (str): The label (clean word to predict).\n        predicted_words (List[str]): Predictions of the model.\n        typed_word (str): The word typed, containing potential typos.\n        context (str): The context (previous words in the sentence).\n        typos (List[Typo]): List of typos introduced.\n        memory (int): Memory consumption for the call of the model.\n        runtime (int): Runtime for the call of the model.\n        domain (str): Domain of this prediction.\n    \"\"\"\n    # Record memory & runtime\n    if memory >= 0:\n        self.acr_memories.append(memory)\n    if runtime >= 0:\n        self.acr_runtimes.append(runtime)\n\n    # Get the type of typo\n    if not typos:\n        typo_type = None\n    elif len(typos) == 1:\n        typo_type = typos[0]\n    else:\n        typo_type = len(typos)\n\n    # Record counts\n    if len(predicted_words) > 0 and predicted_words[0] == true_word:\n        self.acr_c[domain][typo_type].correct += 1\n    if true_word in predicted_words[:3]:\n        self.acr_c[domain][typo_type].correct_3 += 1\n    else:\n        # If the word is not in the top-3 predictions, this is a mistake\n        if self.track_mistakes:\n            self.acr_mistakes.update(\n                [Mistake(actual=true_word, preds=predicted_words[:3], context=f\"{context}{typed_word}\")]\n            )\n\n    self.acr_c[domain][typo_type].total += 1\n
"},{"location":"internals/#kebbie.scorer.Scorer.swp","title":"swp(true_word, predicted_words, context, memory, runtime, domain=None)","text":"

Method used to record a prediction for the swipe resolution task.

Parameters:

Name Type Description Default true_word str

The label (clean word to predict).

required predicted_words List[str]

Predictions of the model.

required context str

The context (previous words in the sentence).

required memory int

Memory consumption for the call of the model.

required runtime int

Runtime for the call of the model.

required domain str

Domain of this prediction.

None Source code in kebbie/scorer.py
def swp(\n    self,\n    true_word: str,\n    predicted_words: List[str],\n    context: str,\n    memory: int,\n    runtime: int,\n    domain: Optional[str] = None,\n) -> None:\n    \"\"\"Method used to record a prediction for the swipe resolution task.\n\n    Args:\n        true_word (str): The label (clean word to predict).\n        predicted_words (List[str]): Predictions of the model.\n        context (str): The context (previous words in the sentence).\n        memory (int): Memory consumption for the call of the model.\n        runtime (int): Runtime for the call of the model.\n        domain (str): Domain of this prediction.\n    \"\"\"\n    # Record memory & runtime\n    if memory >= 0:\n        self.swp_memories.append(memory)\n    if runtime >= 0:\n        self.swp_runtimes.append(runtime)\n\n    # Record counts\n    if len(predicted_words) > 0 and predicted_words[0] == true_word:\n        self.swp_c[domain].correct += 1\n    if true_word in predicted_words[:3]:\n        self.swp_c[domain].correct_3 += 1\n    else:\n        # If the word is not in the top-3 predictions, this is a mistake\n        if self.track_mistakes:\n            self.swp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])\n\n    self.swp_c[domain].total += 1\n
"},{"location":"internals/#kebbie.scorer.Scorer.set_domain","title":"set_domain(domain)","text":"

Method setting the domain for the scores associated with no domain.

To make it easier to score a single sentence, it's possible to call the scorer without a domain (see signature of nwp(), acp(), acr()). In this case the scores are associated to no domain (None key). This method allows the user to set the domain name for these scores with no domain (effectively moving the None domain scores to the given domain name).

Note

If some scores were already linked to the given domain, these scores will be erased (replaced by the scores of the None domain).

Parameters:

Name Type Description Default domain str

Domain name to associate the scores to.

required Source code in kebbie/scorer.py
def set_domain(self, domain: str) -> None:\n    \"\"\"Method setting the domain for the scores associated with no domain.\n\n    To make it easier to score a single sentence, it's possible to call the\n    scorer without a domain (see signature of `nwp()`, `acp()`, `acr()`).\n    In this case the scores are associated to no domain (`None` key).\n    This method allows the user to set the domain name for these scores\n    with no domain (effectively moving the `None` domain scores to the\n    given domain name).\n\n    Note:\n        If some scores were already linked to the given domain, these\n        scores will be erased (replaced by the scores of the `None`\n        domain).\n\n    Args:\n        domain (str): Domain name to associate the scores to.\n    \"\"\"\n    if None in self.nwp_c:\n        self.nwp_c[domain] = self.nwp_c.pop(None)\n    if None in self.acp_c:\n        self.acp_c[domain] = self.acp_c.pop(None)\n    if None in self.acr_c:\n        self.acr_c[domain] = self.acr_c.pop(None)\n    if None in self.swp_c:\n        self.swp_c[domain] = self.swp_c.pop(None)\n
"},{"location":"internals/#kebbie.scorer.Scorer._score_accuracy","title":"_score_accuracy(c)","text":"

Helper method to compute the accuracy given a prediction count.

This method return a dictionary with 3 metrics
  • Accuracy
  • Top3 accuracy
  • Total number of predictions

Parameters:

Name Type Description Default c Count

Count object to use to compute the accuracy.

required

Returns:

Type Description Dict

Dictionary with the computed metrics.

Source code in kebbie/scorer.py
def _score_accuracy(self, c: Count) -> Dict:\n    \"\"\"Helper method to compute the accuracy given a prediction count.\n\n    This method return a dictionary with 3 metrics :\n     * Accuracy\n     * Top3 accuracy\n     * Total number of predictions\n\n    Args:\n        c (Count): Count object to use to compute the accuracy.\n\n    Returns:\n        Dictionary with the computed metrics.\n    \"\"\"\n    return {\n        \"accuracy\": round_to_n(c.correct / c.total) if c.total != 0 else 0,\n        \"top3_accuracy\": round_to_n(c.correct_3 / c.total) if c.total != 0 else 0,\n        \"n\": c.total,\n    }\n
"},{"location":"internals/#kebbie.scorer.Scorer._score_precision_recall","title":"_score_precision_recall(no_typo_c, typo_c, beta)","text":"

Helper method to compute the precision and recall for auto-correction.

This method return a dictionary with several metrics
  • Accuracy
  • Precision
  • Recall
  • F-score
  • Top3 accuracy
  • Top3 precision
  • Top3 recall
  • Top3 F-score
  • Number of predictions with a typo
  • Total number of predictions

For auto-correction, we need 2 Count objects : the counts of typos, and the counts of non-typo (to compute the True Negative and False Positive metrics).

Parameters:

Name Type Description Default no_typo_c Count

Count object for the predictions where no typo were added.

required typo_c Count

Count object for the predictions where typos were added.

required beta float

Beta to use for computing the F-beta score.

required

Returns:

Type Description Dict

Dictionary with the computed metrics.

Source code in kebbie/scorer.py
def _score_precision_recall(self, no_typo_c: Count, typo_c: Count, beta: float) -> Dict:\n    \"\"\"Helper method to compute the precision and recall for\n    auto-correction.\n\n    This method return a dictionary with several metrics :\n     * Accuracy\n     * Precision\n     * Recall\n     * F-score\n     * Top3 accuracy\n     * Top3 precision\n     * Top3 recall\n     * Top3 F-score\n     * Number of predictions with a typo\n     * Total number of predictions\n\n    For auto-correction, we need 2 Count objects : the counts of typos, and\n    the counts of non-typo (to compute the True Negative and False Positive\n    metrics).\n\n    Args:\n        no_typo_c (Count): Count object for the predictions where no typo\n            were added.\n        typo_c (Count): Count object for the predictions where typos were\n            added.\n        beta (float): Beta to use for computing the F-beta score.\n\n    Returns:\n        Dictionary with the computed metrics.\n    \"\"\"\n    # The first step is to divide the counts into TN, FP, TP, FN\n    tn = no_typo_c.correct\n    fp = no_typo_c.total - no_typo_c.correct\n    tp = typo_c.correct\n    fn = typo_c.total - typo_c.correct\n\n    tn_3 = no_typo_c.correct_3\n    fp_3 = no_typo_c.total - no_typo_c.correct_3\n    tp_3 = typo_c.correct_3\n    fn_3 = typo_c.total - typo_c.correct_3\n\n    # Then we compute the metrics\n    p = precision(tp=tp, fp=fp)\n    r = recall(tp=tp, fn=fn)\n\n    p_3 = precision(tp=tp_3, fp=fp_3)\n    r_3 = recall(tp=tp_3, fn=fn_3)\n\n    return {\n        \"accuracy\": round_to_n(accuracy(tp=tp, tn=tn, fp=fp, fn=fn)),\n        \"precision\": round_to_n(p),\n        \"recall\": round_to_n(r),\n        \"fscore\": round_to_n(fbeta(precision=p, recall=r, beta=beta)),\n        \"top3_accuracy\": round_to_n(accuracy(tp=tp_3, tn=tn_3, fp=fp_3, fn=fn_3)),\n        \"top3_precision\": round_to_n(p_3),\n        \"top3_recall\": round_to_n(r_3),\n        \"top3_fscore\": round_to_n(fbeta(precision=p_3, recall=r_3, beta=beta)),\n        \"n_typo\": typo_c.total,\n        \"n\": no_typo_c.total + typo_c.total,\n    }\n
"},{"location":"internals/#kebbie.scorer.Scorer._score_performances","title":"_score_performances(memories, runtimes)","text":"

Helper method to compute metrics related to the memory & runtime.

This method returns a dictionary with several metrics
  • The mean memory consumption
  • The min memory consumption
  • The max memory consumption
  • The mean running time
  • The fastest running time
  • The slowest running time

Parameters:

Name Type Description Default memories List[int]

List of memories consumptions for a specific operation.

required runtimes List[int]

List of runtimes for a specific operation.

required

Returns:

Type Description Dict

Dictionary with the computed metrics.

Source code in kebbie/scorer.py
def _score_performances(self, memories: List[int], runtimes: List[int]) -> Dict:\n    \"\"\"Helper method to compute metrics related to the memory & runtime.\n\n    This method returns a dictionary with several metrics :\n     * The mean memory consumption\n     * The min memory consumption\n     * The max memory consumption\n     * The mean running time\n     * The fastest running time\n     * The slowest running time\n\n    Args:\n        memories (List[int]): List of memories consumptions for a\n            specific operation.\n        runtimes (List[int]): List of runtimes for a specific operation.\n\n    Returns:\n        Dictionary with the computed metrics.\n    \"\"\"\n    perf = {\n        \"mean_memory\": stats.mean(memories) if memories else 0,\n        \"min_memory\": min(memories) if memories else 0,\n        \"max_memory\": max(memories) if memories else 0,\n        \"mean_runtime\": stats.mean(runtimes) if runtimes else 0,\n        \"fastest_runtime\": min(runtimes) if runtimes else 0,\n        \"slowest_runtime\": max(runtimes) if runtimes else 0,\n    }\n\n    if self.human_readable:\n        perf = {\n            name: human_readable_memory(x) if name.endswith(\"memory\") else human_readable_runtime(x)\n            for name, x in perf.items()\n        }\n\n    return perf\n
"},{"location":"internals/#kebbie.scorer.Scorer.score","title":"score(beta=DEFAULT_BETA)","text":"

Method that computes the final scores (as well as some alternative metrics that can bring insight in the capabilities of the model), and output these in an organized dictionary.

Parameters:

Name Type Description Default beta float

Beta to use for computing the F-beta score.

DEFAULT_BETA

Returns:

Type Description Dict

Dictionary containing the computed scores and metrics for the

Dict

model tested.

Source code in kebbie/scorer.py
def score(self, beta: float = DEFAULT_BETA) -> Dict:  # noqa: C901\n    \"\"\"Method that computes the final scores (as well as some alternative\n    metrics that can bring insight in the capabilities of the model), and\n    output these in an organized dictionary.\n\n    Args:\n        beta (float, optional): Beta to use for computing the F-beta score.\n\n    Returns:\n        Dictionary containing the computed scores and metrics for the\n        model tested.\n    \"\"\"\n    # --- Next-word prediction ---\n    # Group scores by domain\n    per = defaultdict(Count)\n    for domain, c in self.nwp_c.items():\n        per[domain] += c\n    total_c = sum(per.values(), Count())\n    per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n    # Task results\n    nwp = {\n        \"score\": self._score_accuracy(total_c),\n        \"per_domain\": per_domain,\n        \"performances\": self._score_performances(self.nwp_memories, self.nwp_runtimes),\n    }\n\n    # --- Auto-completion ---\n    # Group scores by domain\n    per = defaultdict(Count)\n    for domain, d1 in self.acp_c.items():\n        for has_typo, d2 in d1.items():\n            for compl_rate, c in d2.items():\n                per[domain] += c\n    total_c = sum(per.values(), Count())\n    per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n    # Group scores by completion rate\n    per = defaultdict(Count)\n    for domain, d1 in self.acp_c.items():\n        for has_typo, d2 in d1.items():\n            for compl_rate, c in d2.items():\n                per[compl_rate] += c\n    per_compl_rate = {\n        \"<25%\": self._score_accuracy(sum((c for k, c in per.items() if k < 0.25), Count())),\n        \"25%~50%\": self._score_accuracy(sum((c for k, c in per.items() if 0.25 <= k < 0.5), Count())),\n        \"50%~75%\": self._score_accuracy(sum((c for k, c in per.items() if 0.5 <= k < 0.75), Count())),\n        \">75%\": self._score_accuracy(sum((c for k, c in per.items() if 0.75 <= k), Count())),\n    }\n\n    # Group scores by with_typo / without_typo\n    per = defaultdict(Count)\n    for domain, d1 in self.acp_c.items():\n        for has_typo, d2 in d1.items():\n            for compl_rate, c in d2.items():\n                per[has_typo] += c\n    per_other = {k: self._score_accuracy(per[k]) for k in [WITHOUT_TYPO, WITH_TYPO]}\n\n    # Task results\n    acp = {\n        \"score\": self._score_accuracy(total_c),\n        \"per_domain\": per_domain,\n        \"per_completion_rate\": per_compl_rate,\n        \"per_other\": per_other,\n        \"performances\": self._score_performances(self.acp_memories, self.acp_runtimes),\n    }\n\n    # --- Auto-correction ---\n    # Group scores by domain\n    no_typo_per, typo_per = defaultdict(Count), defaultdict(Count)\n    for domain, d1 in self.acr_c.items():\n        for typo, c in d1.items():\n            if typo is None:\n                no_typo_per[domain] += c\n            else:\n                typo_per[domain] += c\n    no_typo_total_c = sum(no_typo_per.values(), Count())\n    typo_total_c = sum(typo_per.values(), Count())\n    per_domain = {k: self._score_precision_recall(no_typo_per[k], typo_per[k], beta=beta) for k in no_typo_per}\n\n    # Group scores by typo type\n    no_typo_c, typo_per = Count(), defaultdict(Count)\n    for domain, d1 in self.acr_c.items():\n        for typo, c in d1.items():\n            if typo is None:\n                no_typo_c += c\n            else:\n                typo_per[typo] += c\n    # Divide the total count of no-typo into each type of typos with the right proportions\n    no_typo_per = defaultdict(Count, {k: no_typo_c * (c.total / typo_total_c.total) for k, c in typo_per.items()})\n    per_typo_type = {t.name: self._score_precision_recall(no_typo_per[t], typo_per[t], beta=beta) for t in Typo}\n    per_n_typo = {\n        \"1\": self._score_precision_recall(\n            sum((c for k, c in no_typo_per.items() if isinstance(k, Typo)), Count()),\n            sum((c for k, c in typo_per.items() if isinstance(k, Typo)), Count()),\n            beta=beta,\n        ),\n        \"2\": self._score_precision_recall(no_typo_per[2], typo_per[2], beta=beta),\n        \"3+\": self._score_precision_recall(\n            sum((c for k, c in no_typo_per.items() if isinstance(k, int) and k > 2), Count()),\n            sum((c for k, c in typo_per.items() if isinstance(k, int) and k > 2), Count()),\n            beta=beta,\n        ),\n    }\n\n    # Task results\n    acr = {\n        \"score\": self._score_precision_recall(no_typo_total_c, typo_total_c, beta=beta),\n        \"per_domain\": per_domain,\n        \"per_typo_type\": per_typo_type,\n        \"per_number_of_typos\": per_n_typo,\n        \"performances\": self._score_performances(self.acr_memories, self.acr_runtimes),\n    }\n\n    # --- Swipe resolution ---\n    # Group scores by domain\n    per = defaultdict(Count)\n    for domain, c in self.swp_c.items():\n        per[domain] += c\n    total_c = sum(per.values(), Count())\n    per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n    # Task results\n    swp = {\n        \"score\": self._score_accuracy(total_c),\n        \"per_domain\": per_domain,\n        \"performances\": self._score_performances(self.swp_memories, self.swp_runtimes),\n    }\n\n    # Final results\n    results = {\n        \"next_word_prediction\": nwp,\n        \"auto_completion\": acp,\n        \"auto_correction\": acr,\n        \"swipe_resolution\": swp,\n    }\n\n    # Add the overall score\n    results[\"overall_score\"] = one_score(results)\n\n    return results\n
"},{"location":"internals/#kebbie.scorer.dd_x_layers","title":"dd_x_layers(n_layers=1)","text":"

Helper function for creating a nested defaultdict, with a specified number of nest level. The end object is a Count.

Parameters:

Name Type Description Default n_layers int

Number of layer for the defaultdict.

1

Returns:

Type Description defaultdict

Created nested defaultdict.

Source code in kebbie/scorer.py
def dd_x_layers(n_layers: int = 1) -> defaultdict:\n    \"\"\"Helper function for creating a nested defaultdict, with a specified\n    number of nest level. The end object is a Count.\n\n    Args:\n        n_layers (int): Number of layer for the defaultdict.\n\n    Returns:\n        Created nested defaultdict.\n    \"\"\"\n    assert n_layers > 0, f\"A default dict have at least 1 layer ({n_layers} given)\"\n    if n_layers == 1:\n        return defaultdict(Count)\n    else:\n        return defaultdict(partial(dd_x_layers, n_layers=n_layers - 1))\n
"},{"location":"internals/#kebbie.scorer.one_score","title":"one_score(results)","text":"

One Score to rule them all, One Score to find them, One Score to bring them all and in the darkness bind them.

This function is here to gather the various testing metrics of a JET file in a single number, to easily compare models.

We take a single metric for each task, and weight them based on the importance of the task (these metrics already have the same scale : between 0 and 1).

For NWP and ACP we take a top-3 metric, because these tasks usually involve a user action from a proposed list. For ACR and SWP, we take a top-1 metric, since usually it's automatically applied without user input.

Parameters:

Name Type Description Default results Dict

Testing results. Should be a dictionary containing all the metrics (used to compute the one score).

required

Returns:

Type Description float

One score, computed from the results given.

Source code in kebbie/scorer.py
def one_score(results: Dict) -> float:\n    \"\"\"One Score to rule them all, One Score to find them, One Score to bring\n    them all and in the darkness bind them.\n\n    This function is here to gather the various testing metrics of a JET file\n    in a single number, to easily compare models.\n\n    We take a single metric for each task, and weight them based on the\n    importance of the task (these metrics already have the same scale : between\n    0 and 1).\n\n    For NWP and ACP we take a top-3 metric, because these tasks usually involve\n    a user action from a proposed list. For ACR and SWP, we take a top-1\n    metric, since usually it's automatically applied without user input.\n\n    Args:\n        results (Dict): Testing results. Should be a dictionary containing all\n            the metrics (used to compute the one score).\n\n    Returns:\n        One score, computed from the results given.\n    \"\"\"\n    nwp = results[\"next_word_prediction\"][\"score\"][\"top3_accuracy\"]\n    acp = results[\"auto_completion\"][\"score\"][\"top3_accuracy\"]\n    acr = results[\"auto_correction\"][\"score\"][\"fscore\"]\n    swp = results[\"swipe_resolution\"][\"score\"][\"accuracy\"]\n\n    return 0.15 * nwp + 0.2 * acp + 0.4 * acr + 0.25 * swp\n
"},{"location":"internals/#tokenizerpy","title":"tokenizer.py","text":"

Module defining BasicTokenizer, very basic tokenizer to separate a sentence into words.

"},{"location":"internals/#kebbie.tokenizer.BasicTokenizer","title":"BasicTokenizer","text":"

A basic tokenizer, used for regular latin languages. This tokenizer simply use space as word separator. Since it is used for testing only, we don't need to care about punctuations, etc...

Source code in kebbie/tokenizer.py
class BasicTokenizer:\n    \"\"\"A basic tokenizer, used for regular latin languages.\n    This tokenizer simply use space as word separator. Since it is used for\n    testing only, we don't need to care about punctuations, etc...\n    \"\"\"\n\n    def preprocess(self, sentence: str) -> str:\n        \"\"\"Method for simple preprocessing.\n\n        The goal of this function is not to provide an extensive and clean\n        preprocessing. The goal is just to normalize some characters (that\n        are not in our keyboard, so the user can't officially type them) into\n        their normal counterpart, that are in the keyboard.\n\n        Args:\n            sentence (str): String to normalize.\n\n        Returns:\n            Normalized string.\n        \"\"\"\n        # Replace things that are like \"\n        sentence = sentence.replace(\"\u201c\", '\"').replace(\"\u201d\", '\"').replace(\"\u201e\", '\"')\n\n        # Replace things that are like '\n        sentence = sentence.replace(\"\u2019\", \"'\").replace(\"\u02bb\", \"'\").replace(\"\u2018\", \"'\").replace(\"\u00b4\", \"'\").replace(\"\u02bc\", \"'\")\n\n        # Replace things that are like -\n        sentence = sentence.replace(\"\u2013\", \"-\").replace(\"\u2014\", \"-\").replace(\"\u2011\", \"-\").replace(\"\u2212\", \"-\").replace(\"\u30fc\", \"-\")\n\n        # Replace other punctuations\n        sentence = sentence.replace(\"\u2026\", \"...\").replace(\"\u201a\", \",\").replace(\"\u2024\", \".\")\n\n        # TODO: Each keyboard has its own way to deal with punctuation\n        # (applying auto-correction or not, displaying next-word prediction or\n        # not, etc...). So for now we just get rid of the punctuations, it's a\n        # convenient shortcut and it's fair to all keyboards.\n        # Eventually we should find a better way to deal with that.\n        sentence = re.sub(r\"\\s*\\.+\\s*\", \" \", sentence)\n        sentence = re.sub(r\"\\s*[,:;\\(\\)\\\"!?\\[\\]\\{\\}~]\\s*\", \" \", sentence)\n\n        return sentence\n\n    def word_split(self, sentence: str) -> List[str]:\n        \"\"\"Method for splitting a sentence into a list of words.\n\n        Args:\n            sentence (str): Sentence to split.\n\n        Returns:\n            List of words from the sentence.\n        \"\"\"\n        return sentence.strip().split()\n\n    def update_context(self, context: str, word: str) -> str:\n        \"\"\"Method for updating a context, given a word that was typed.\n\n        Args:\n            context (str): Existing context.\n            word (str): Word being typed.\n\n        Returns:\n            Updated context.\n        \"\"\"\n        return context + word + \" \"\n
"},{"location":"internals/#kebbie.tokenizer.BasicTokenizer.preprocess","title":"preprocess(sentence)","text":"

Method for simple preprocessing.

The goal of this function is not to provide an extensive and clean preprocessing. The goal is just to normalize some characters (that are not in our keyboard, so the user can't officially type them) into their normal counterpart, that are in the keyboard.

Parameters:

Name Type Description Default sentence str

String to normalize.

required

Returns:

Type Description str

Normalized string.

Source code in kebbie/tokenizer.py
def preprocess(self, sentence: str) -> str:\n    \"\"\"Method for simple preprocessing.\n\n    The goal of this function is not to provide an extensive and clean\n    preprocessing. The goal is just to normalize some characters (that\n    are not in our keyboard, so the user can't officially type them) into\n    their normal counterpart, that are in the keyboard.\n\n    Args:\n        sentence (str): String to normalize.\n\n    Returns:\n        Normalized string.\n    \"\"\"\n    # Replace things that are like \"\n    sentence = sentence.replace(\"\u201c\", '\"').replace(\"\u201d\", '\"').replace(\"\u201e\", '\"')\n\n    # Replace things that are like '\n    sentence = sentence.replace(\"\u2019\", \"'\").replace(\"\u02bb\", \"'\").replace(\"\u2018\", \"'\").replace(\"\u00b4\", \"'\").replace(\"\u02bc\", \"'\")\n\n    # Replace things that are like -\n    sentence = sentence.replace(\"\u2013\", \"-\").replace(\"\u2014\", \"-\").replace(\"\u2011\", \"-\").replace(\"\u2212\", \"-\").replace(\"\u30fc\", \"-\")\n\n    # Replace other punctuations\n    sentence = sentence.replace(\"\u2026\", \"...\").replace(\"\u201a\", \",\").replace(\"\u2024\", \".\")\n\n    # TODO: Each keyboard has its own way to deal with punctuation\n    # (applying auto-correction or not, displaying next-word prediction or\n    # not, etc...). So for now we just get rid of the punctuations, it's a\n    # convenient shortcut and it's fair to all keyboards.\n    # Eventually we should find a better way to deal with that.\n    sentence = re.sub(r\"\\s*\\.+\\s*\", \" \", sentence)\n    sentence = re.sub(r\"\\s*[,:;\\(\\)\\\"!?\\[\\]\\{\\}~]\\s*\", \" \", sentence)\n\n    return sentence\n
"},{"location":"internals/#kebbie.tokenizer.BasicTokenizer.word_split","title":"word_split(sentence)","text":"

Method for splitting a sentence into a list of words.

Parameters:

Name Type Description Default sentence str

Sentence to split.

required

Returns:

Type Description List[str]

List of words from the sentence.

Source code in kebbie/tokenizer.py
def word_split(self, sentence: str) -> List[str]:\n    \"\"\"Method for splitting a sentence into a list of words.\n\n    Args:\n        sentence (str): Sentence to split.\n\n    Returns:\n        List of words from the sentence.\n    \"\"\"\n    return sentence.strip().split()\n
"},{"location":"internals/#kebbie.tokenizer.BasicTokenizer.update_context","title":"update_context(context, word)","text":"

Method for updating a context, given a word that was typed.

Parameters:

Name Type Description Default context str

Existing context.

required word str

Word being typed.

required

Returns:

Type Description str

Updated context.

Source code in kebbie/tokenizer.py
def update_context(self, context: str, word: str) -> str:\n    \"\"\"Method for updating a context, given a word that was typed.\n\n    Args:\n        context (str): Existing context.\n        word (str): Word being typed.\n\n    Returns:\n        Updated context.\n    \"\"\"\n    return context + word + \" \"\n
"},{"location":"internals/#utilspy","title":"utils.py","text":"

Various utils function used by kebbie.

"},{"location":"internals/#kebbie.utils.profile_fn","title":"profile_fn(fn, *args, **kwargs)","text":"

Profile the runtime and memory usage of the given function.

Note that it will only account for memory allocated by python (if you use a library in C/C++ that does its own allocation, it won't report it).

Parameters:

Name Type Description Default fn Callable

Function to profile.

required *args Any

Positional arguments to pass to the given function.

() **kwargs Any

Keywords arguments to pass to the given function.

{}

Returns:

Type Description Any

The return value of the function called.

int

The memory usage (in bytes).

int

The runtime (in nano seconds).

Source code in kebbie/utils.py
def profile_fn(fn: Callable, *args: Any, **kwargs: Any) -> Tuple[Any, int, int]:\n    \"\"\"Profile the runtime and memory usage of the given function.\n\n    Note that it will only account for memory allocated by python (if you use\n    a library in C/C++ that does its own allocation, it won't report it).\n\n    Args:\n        fn (Callable): Function to profile.\n        *args: Positional arguments to pass to the given function.\n        **kwargs: Keywords arguments to pass to the given function.\n\n    Returns:\n        The return value of the function called.\n        The memory usage (in bytes).\n        The runtime (in nano seconds).\n    \"\"\"\n    tracemalloc.start()\n    t0 = time.time()\n\n    result = fn(*args, **kwargs)\n\n    runtime = time.time() - t0\n    _, memory = tracemalloc.get_traced_memory()\n\n    return result, memory, runtime * SEC_TO_NANOSEC\n
"},{"location":"internals/#kebbie.utils.euclidian_dist","title":"euclidian_dist(p1, p2)","text":"

Function computing the euclidian distance between 2 points.

Parameters:

Name Type Description Default p1 Tuple[float, float]

Point 1.

required p2 Tuple[float, float]

Point 2.

required

Returns:

Type Description float

Euclidian distance between the 2 given points.

Source code in kebbie/utils.py
def euclidian_dist(p1: Tuple[float, float], p2: Tuple[float, float]) -> float:\n    \"\"\"Function computing the euclidian distance between 2 points.\n\n    Args:\n        p1 (Tuple[float, float]): Point 1.\n        p2 (Tuple[float, float]): Point 2.\n\n    Returns:\n        Euclidian distance between the 2 given points.\n    \"\"\"\n    return math.sqrt(sum((a - b) ** 2 for a, b in zip(p1, p2)))\n
"},{"location":"internals/#kebbie.utils.load_keyboard","title":"load_keyboard(lang='en-US')","text":"

Load the keyboard data for the given language.

For now, only en-US is supported.

Parameters:

Name Type Description Default lang str

Language of the keyboard to load.

'en-US'

Returns:

Type Description Dict

The keyboard data.

Source code in kebbie/utils.py
def load_keyboard(lang: str = \"en-US\") -> Dict:\n    \"\"\"Load the keyboard data for the given language.\n\n    For now, only `en-US` is supported.\n\n    Args:\n        lang (str, optional): Language of the keyboard to load.\n\n    Returns:\n        The keyboard data.\n    \"\"\"\n    layout_folder = Path(__file__).parent / \"layouts\"\n    with open(layout_folder / f\"{lang}.json\", \"r\") as f:\n        keyboard = json.load(f)\n    return keyboard\n
"},{"location":"internals/#kebbie.utils.strip_accents","title":"strip_accents(s)","text":"

Util function for removing accents from a given string.

Parameters:

Name Type Description Default s str

Accented string.

required

Returns:

Type Description str

Same string, without accent.

Source code in kebbie/utils.py
def strip_accents(s: str) -> str:\n    \"\"\"Util function for removing accents from a given string.\n\n    Args:\n        s (str): Accented string.\n\n    Returns:\n        Same string, without accent.\n    \"\"\"\n    nfkd_form = unicodedata.normalize(\"NFKD\", s)\n    return \"\".join([c for c in nfkd_form if not unicodedata.combining(c)])\n
"},{"location":"internals/#kebbie.utils.sample","title":"sample(proba)","text":"

Simple function to sample an event with the given probability. For example, calling sample(0.95) will return True in 95% cases, and False in 5% cases.

Parameters:

Name Type Description Default proba float

Probability of the event to happen. Should be between 0 and 1 (included).

required

Returns:

Type Description bool

True if the event was sampled, False otherwise.

Source code in kebbie/utils.py
def sample(proba: float) -> bool:\n    \"\"\"Simple function to sample an event with the given probability.\n    For example, calling `sample(0.95)` will return `True` in 95% cases, and\n    `False` in 5% cases.\n\n    Args:\n        proba (float): Probability of the event to happen. Should be between 0\n            and 1 (included).\n\n    Returns:\n        `True` if the event was sampled, `False` otherwise.\n    \"\"\"\n    assert 0 <= proba <= 1, f\"`{proba}` is not a valid probability (should be between 0 and 1)\"\n    if proba == 0:\n        return False\n    elif proba == 1:\n        return True\n    else:\n        return random.choices([True, False], weights=[proba, 1 - proba])[0]\n
"},{"location":"internals/#kebbie.utils.sample_among","title":"sample_among(probs, with_none=True)","text":"

Function that sample an event among several with different probabilities.

Parameters:

Name Type Description Default probs Dict[Any, float]

Dictionary representing the different events and their probabilities. Each probability should be above 0 and their sum should not exceed 1.

required with_none bool

If set to True, add a None option (no event sampled).

True

Returns:

Type Description Any

The corresponding key of the event sampled.

Source code in kebbie/utils.py
def sample_among(probs: Dict[Any, float], with_none: bool = True) -> Any:\n    \"\"\"Function that sample an event among several with different\n    probabilities.\n\n    Args:\n        probs (Dict[Any, float]): Dictionary representing the different events\n            and their probabilities. Each probability should be above 0 and\n            their sum should not exceed 1.\n        with_none (bool): If set to `True`, add a `None` option (no event\n            sampled).\n\n    Returns:\n        The corresponding key of the event sampled.\n    \"\"\"\n    options = list(probs.keys())\n    weights = list(probs.values())\n    assert (\n        all(w >= 0 for w in weights) and sum(weights) <= 1\n    ), \"The numbers given are not a probability (should be above 0 and their sum should not exceed 1)\"\n\n    if with_none:\n        options.append(None)\n        weights.append(1 - sum(weights))\n\n    return random.choices(options, weights=weights)[0]\n
"},{"location":"internals/#kebbie.utils.sample_partial_word","title":"sample_partial_word(keystrokes, word, true_word)","text":"

Sample a partial word from a given word, and extract the corresponding keystrokes as well.

Sampling is done with increasing weights (more chances to sample a longer list). For example if the list represent the keystrokes of \"abcdef\", the probabilities are as follow: * \"a\" : 1/15 * \"ab\" : 2/15 * \"abc\" : 3/15 * \"abcd\" : 4/15 * \"abcde\" : 5/15

Parameters:

Name Type Description Default keystrokes List[Optional[Tuple[float, float]]]

Complete list of keystrokes, representing a full word.

required word str

The word corresponding to the keystrokes.

required true_word str

Actual word (without typo). Necessary to ensure the sampled keystrokes are partial.

required

Returns:

Type Description List[Optional[Tuple[float, float]]]

The partial list of keystrokes (sampled from the given word).

str

The partial word (sampled from the given word).

Source code in kebbie/utils.py
def sample_partial_word(\n    keystrokes: List[Optional[Tuple[float, float]]], word: str, true_word: str\n) -> Tuple[List[Optional[Tuple[float, float]]], str]:\n    \"\"\"Sample a partial word from a given word, and extract the corresponding\n    keystrokes as well.\n\n    Sampling is done with increasing weights (more chances to sample a longer\n    list). For example if the list represent the keystrokes of \"abcdef\", the\n    probabilities are as follow:\n     * \"a\" :     1/15\n     * \"ab\" :    2/15\n     * \"abc\" :   3/15\n     * \"abcd\" :  4/15\n     * \"abcde\" : 5/15\n\n    Args:\n        keystrokes (List[Optional[Tuple[float, float]]]): Complete list of\n            keystrokes, representing a full word.\n        word (str): The word corresponding to the keystrokes.\n        true_word (str): Actual word (without typo). Necessary to ensure the\n            sampled keystrokes are partial.\n\n    Returns:\n        The partial list of keystrokes (sampled from the given word).\n        The partial word (sampled from the given word).\n    \"\"\"\n    r = range(1, min(len(true_word), len(word)))\n    s = random.choices(r, weights=r)[0]\n    return keystrokes[:s], word[:s]\n
"},{"location":"internals/#kebbie.utils.accuracy","title":"accuracy(tp, tn, fp, fn)","text":"

Function computing the precision.

Parameters:

Name Type Description Default tp int

Number of True Positive.

required tn int

Number of True Negative.

required fp int

Number of False Positive.

required fn int

Number of False Negative.

required

Returns:

Type Description float

Accuracy.

Source code in kebbie/utils.py
def accuracy(tp: int, tn: int, fp: int, fn: int) -> float:\n    \"\"\"Function computing the precision.\n\n    Args:\n        tp (int): Number of True Positive.\n        tn (int): Number of True Negative.\n        fp (int): Number of False Positive.\n        fn (int): Number of False Negative.\n\n    Returns:\n        Accuracy.\n    \"\"\"\n    try:\n        return (tp + tn) / (tp + tn + fp + fn)\n    except ZeroDivisionError:\n        return 0\n
"},{"location":"internals/#kebbie.utils.precision","title":"precision(tp, fp)","text":"

Function computing the precision.

Parameters:

Name Type Description Default tp int

Number of True Positive.

required fp int

Number of False Positive.

required

Returns:

Type Description float

Precision.

Source code in kebbie/utils.py
def precision(tp: int, fp: int) -> float:\n    \"\"\"Function computing the precision.\n\n    Args:\n        tp (int): Number of True Positive.\n        fp (int): Number of False Positive.\n\n    Returns:\n        Precision.\n    \"\"\"\n    try:\n        return tp / (tp + fp)\n    except ZeroDivisionError:\n        return 0\n
"},{"location":"internals/#kebbie.utils.recall","title":"recall(tp, fn)","text":"

Function computing the recall.

Parameters:

Name Type Description Default tp int

Number of True Positive.

required fn int

Number of False Negative.

required

Returns:

Type Description float

Recall.

Source code in kebbie/utils.py
def recall(tp: int, fn: int) -> float:\n    \"\"\"Function computing the recall.\n\n    Args:\n        tp (int): Number of True Positive.\n        fn (int): Number of False Negative.\n\n    Returns:\n        Recall.\n    \"\"\"\n    try:\n        return tp / (tp + fn)\n    except ZeroDivisionError:\n        return 0\n
"},{"location":"internals/#kebbie.utils.fbeta","title":"fbeta(precision, recall, beta=1)","text":"

Function computing the F-beta score (which is a generalization of the F1 score).

The value of Beta changes how much we weight recall versus precision
  • For beta=0.5, Precision is twice as important as Recall
  • For beta=2, Recall is twice as important as Precision

Parameters:

Name Type Description Default precision float

Precision.

required recall float

Recall.

required beta float

Beta factor.

1

Returns:

Type Description float

F-beta score.

Source code in kebbie/utils.py
def fbeta(precision: float, recall: float, beta: float = 1) -> float:\n    \"\"\"Function computing the F-beta score (which is a generalization of the\n    F1 score).\n\n    The value of Beta changes how much we weight recall versus precision:\n     * For beta=0.5, Precision is twice as important as Recall\n     * For beta=2, Recall is twice as important as Precision\n\n    Args:\n        precision (float): Precision.\n        recall (float): Recall.\n        beta (float): Beta factor.\n\n    Returns:\n        F-beta score.\n    \"\"\"\n    try:\n        return (1 + beta**2) * precision * recall / (beta**2 * precision + recall)\n    except ZeroDivisionError:\n        return 0\n
"},{"location":"internals/#kebbie.utils.round_to_n","title":"round_to_n(x, n=2)","text":"

Util function to round a given number to n significant digits.

Parameters:

Name Type Description Default x float

Number to round.

required n int

Number of significant digits to use.

2

Returns:

Type Description float

Rounded number.

Source code in kebbie/utils.py
def round_to_n(x: float, n: int = 2) -> float:\n    \"\"\"Util function to round a given number to n significant digits.\n\n    Args:\n        x (float): Number to round.\n        n (int): Number of significant digits to use.\n\n    Returns:\n        Rounded number.\n    \"\"\"\n    return round(x, -int(math.floor(math.log10(x))) + (n - 1)) if x != 0 else 0\n
"},{"location":"internals/#kebbie.utils.human_readable_memory","title":"human_readable_memory(x)","text":"

Given a number in bytes, return a human-readable string of this number, with the right unit.

Parameters:

Name Type Description Default x int

Number in bytes.

required

Returns:

Type Description str

Human-readable version of the given number, with the right unit.

Source code in kebbie/utils.py
def human_readable_memory(x: int) -> str:\n    \"\"\"Given a number in bytes, return a human-readable string of this number,\n    with the right unit.\n\n    Args:\n        x (int): Number in bytes.\n\n    Returns:\n        Human-readable version of the given number, with the right unit.\n    \"\"\"\n    x = round_to_n(x, n=3)\n    for unit in [\"B\", \"KB\", \"MB\", \"GB\"]:\n        if x < 1000:\n            return f\"{x:g} {unit}\"\n\n        x /= 1000\n    return f\"{x:g} TB\"\n
"},{"location":"internals/#kebbie.utils.human_readable_runtime","title":"human_readable_runtime(x)","text":"

Given a number in nanoseconds, return a human-readable string of this number, with the right unit.

Parameters:

Name Type Description Default x int

Number in nanoseconds.

required

Returns:

Type Description str

Human-readable version of the given number, with the right unit.

Source code in kebbie/utils.py
def human_readable_runtime(x: int) -> str:\n    \"\"\"Given a number in nanoseconds, return a human-readable string of this\n    number, with the right unit.\n\n    Args:\n        x (int): Number in nanoseconds.\n\n    Returns:\n        Human-readable version of the given number, with the right unit.\n    \"\"\"\n    x = round_to_n(x, n=3)\n    for unit in [\"ns\", \"\u03bcs\", \"ms\"]:\n        if x < 1000:\n            return f\"{x:g} {unit}\"\n\n        x /= 1000\n    return f\"{x:g} s\"\n
"},{"location":"internals/#kebbie.utils.get_soda_dataset","title":"get_soda_dataset(max_sentences=2000, seed=31)","text":"

Load the SODA dataset.

Parameters:

Name Type Description Default max_sentences int

Maximum number of sentences in total in the dataset. They will be shared across domain (50% from the narrative domain, 50% from the dialogue domain).

2000 seed int

Seed to use when shuffling the dataset (since we don't use the whole dataset, it's better to shuffle it before extracting the X first sentences).

31

Returns:

Type Description Dict[str, List[str]]

The dataset, separated into two domains : narrative and dialogue.

Source code in kebbie/utils.py
def get_soda_dataset(max_sentences: int = 2_000, seed: int = 31) -> Dict[str, List[str]]:\n    \"\"\"Load the SODA dataset.\n\n    Args:\n        max_sentences (int, optional): Maximum number of sentences in total in\n            the dataset. They will be shared across domain (50% from the\n            `narrative` domain, 50% from the `dialogue` domain).\n        seed (int, optional): Seed to use when shuffling the dataset (since we\n            don't use the whole dataset, it's better to shuffle it before\n            extracting the X first sentences).\n\n    Returns:\n        The dataset, separated into two domains : narrative and dialogue.\n    \"\"\"\n    data = {\"narrative\": [], \"dialogue\": []}\n    max_domain_sentences = max_sentences // 2\n\n    hf_dataset = datasets.load_dataset(\"allenai/soda\", split=\"test\")\n    hf_dataset = hf_dataset.shuffle(seed=seed)\n\n    for sample in hf_dataset:\n        if len(data[\"narrative\"]) >= max_domain_sentences and len(data[\"dialogue\"]) >= max_domain_sentences:\n            break\n\n        if len(data[\"narrative\"]) < max_domain_sentences:\n            data[\"narrative\"].append(sample[\"narrative\"])\n\n        for sen in sample[\"dialogue\"]:\n            if len(data[\"dialogue\"]) < max_domain_sentences:\n                data[\"dialogue\"].append(sen)\n\n    return data\n
"},{"location":"internals/#constants","title":"Constants","text":""},{"location":"internals/#__init__py","title":"__init__.py","text":""},{"location":"internals/#kebbie.SUPPORTED_LANG","title":"SUPPORTED_LANG = ['en-US']","text":""},{"location":"internals/#kebbie.N_MOST_COMMON_MISTAKES","title":"N_MOST_COMMON_MISTAKES = 1000","text":""},{"location":"internals/#kebbie.DEFAULT_SEED","title":"DEFAULT_SEED = 42","text":""},{"location":"internals/#emulatorpy_1","title":"emulator.py","text":""},{"location":"internals/#kebbie.emulator.ANDROID","title":"ANDROID = 'android'","text":""},{"location":"internals/#kebbie.emulator.IOS","title":"IOS = 'ios'","text":""},{"location":"internals/#kebbie.emulator.GBOARD","title":"GBOARD = 'gboard'","text":""},{"location":"internals/#kebbie.emulator.TAPPA","title":"TAPPA = 'tappa'","text":""},{"location":"internals/#kebbie.emulator.FLEKSY","title":"FLEKSY = 'fleksy'","text":""},{"location":"internals/#kebbie.emulator.KBKITPRO","title":"KBKITPRO = 'kbkitpro'","text":""},{"location":"internals/#kebbie.emulator.KBKITOSS","title":"KBKITOSS = 'kbkitoss'","text":""},{"location":"internals/#kebbie.emulator.SWIFTKEY","title":"SWIFTKEY = 'swiftkey'","text":""},{"location":"internals/#kebbie.emulator.KEYBOARD_PACKAGE","title":"KEYBOARD_PACKAGE = {GBOARD: 'com.google.android.inputmethod.latin', SWIFTKEY: 'com.touchtype.swiftkey', TAPPA: 'com.tappa.keyboard'}","text":""},{"location":"internals/#kebbie.emulator.ANDROID_CAPABILITIES","title":"ANDROID_CAPABILITIES = {'platformName': 'android', 'automationName': 'UiAutomator2', 'enableMultiWindows': True, 'deviceName': 'test', 'newCommandTimeout': 3600}","text":""},{"location":"internals/#kebbie.emulator.IOS_CAPABILITIES","title":"IOS_CAPABILITIES = {'platformName': 'iOS', 'automationName': 'XCUITest', 'udid': 'auto', 'xcodeOrgId': '8556JTA4X4', 'xcodeSigningId': 'iPhone Developer', 'useNewWDA': False, 'usePrebuiltWdDA': True, 'startIWDP': True, 'bundleId': 'com.apple.MobileSMS', 'newCommandTimeout': 3600}","text":""},{"location":"internals/#kebbie.emulator.BROWSER_PAD_URL","title":"BROWSER_PAD_URL = 'https://www.justnotepad.com'","text":""},{"location":"internals/#kebbie.emulator.ANDROID_TYPING_FIELD_CLASS_NAME","title":"ANDROID_TYPING_FIELD_CLASS_NAME = 'android.widget.EditText'","text":""},{"location":"internals/#kebbie.emulator.DUMMY_RECIPIENT","title":"DUMMY_RECIPIENT = '0'","text":""},{"location":"internals/#kebbie.emulator.IOS_TYPING_FIELD_ID","title":"IOS_TYPING_FIELD_ID = 'messageBodyField'","text":""},{"location":"internals/#kebbie.emulator.IOS_START_CHAT_CLASS_NAME","title":"IOS_START_CHAT_CLASS_NAME = 'XCUIElementTypeCell'","text":""},{"location":"internals/#kebbie.emulator.TESSERACT_CONFIG","title":"TESSERACT_CONFIG = '-c tessedit_char_blacklist=0123456789\u201d:!@\u00b7$%&/()=.\u00bf?'","text":""},{"location":"internals/#kebbie.emulator.PREDICTION_DELAY","title":"PREDICTION_DELAY = 0.4","text":""},{"location":"internals/#kebbie.emulator.CONTENT_TO_IGNORE","title":"CONTENT_TO_IGNORE = ['Sticker', 'GIF', 'Clipboard', 'Settings', 'Back', 'Switch input method', 'Paste item', 'Close', 'paintpalette', 'Search Document', 'Microphone', 'gearshape', 'Next Locale', 'paintpalette', 'EmojiCategories/smileysAndPeople', 'EmojiCategories/animalsAndNature', 'EmojiCategories/foodAndDrink', 'EmojiCategories/activity', 'EmojiCategories/travelAndPlaces', 'EmojiCategories/objects', 'EmojiCategories/symbols', 'EmojiCategories/flags', 'Add', 'And', 'Are', '\u201cA\u201d', '\ud83d\ude80']","text":""},{"location":"internals/#kebbie.emulator.CONTENT_TO_RENAME","title":"CONTENT_TO_RENAME = {'Shift': 'shift', 'Delete': 'backspace', 'Backspace': 'backspace', 'Space': 'spacebar', 'space': 'spacebar', 'Emoji button': 'smiley', 'Emoji': 'smiley', 'Keyboard Type - emojis': 'smiley', 'Search': 'enter', 'return': 'enter', 'Enter': 'enter', 'Symbol keyboard': 'numbers', 'Symbols': 'numbers', 'Symbols and numbers': 'numbers', 'Keyboard Type - numeric': 'numbers', 'Voice input': 'mic', ',, alternatives available, Voice typing, long press to activate': 'mic', 'Close features menu': 'magic', 'Open features menu': 'magic', 'underline': '_', '&amp;': '&', 'ampersand': '&', 'Dash': '-', 'Plus': '+', 'Left parenthesis': '(', 'Right parenthesis': ')', 'slash': '/', 'Apostrophe': \"'\", 'Colon': ':', 'Semicolon': ';', 'Exclamation': '!', 'Question mark': '?', 'Letter keyboard': 'letters', 'Letters': 'letters', 'Keyboard Type - auto': 'letters', 'Digit keyboard': 'numbers', 'More symbols': 'shift', 'Keyboard Type - symbolic': 'shift', 'Double tap for uppercase': 'shift', 'Double tap for caps lock': 'shift', 'capital Q': 'Q', 'capital W': 'W', 'capital E': 'E', 'capital R': 'R', 'capital T': 'T', 'capital Y': 'Y', 'capital U': 'U', 'capital I': 'I', 'Capital I': 'I', 'capital O': 'O', 'capital P': 'P', 'capital A': 'A', 'capital S': 'S', 'capital D': 'D', 'capital F': 'F', 'capital G': 'G', 'capital H': 'H', 'capital J': 'J', 'capital K': 'K', 'capital L': 'L', 'capital Z': 'Z', 'capital X': 'X', 'capital C': 'C', 'capital V': 'V', 'capital B': 'B', 'capital N': 'N', 'capital M': 'M'}","text":""},{"location":"internals/#kebbie.emulator.FLEKSY_LAYOUT","title":"FLEKSY_LAYOUT = {'keyboard_frame': [0, 517, 393, 266], 'lowercase': {'q': [0.007407407407407408, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'w': [0.10462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'e': [0.20462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'r': [0.30462962962962964, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 't': [0.4046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'y': [0.5046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'u': [0.6046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'i': [0.7046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'o': [0.8046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'p': [0.9046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'a': [0.05740740740740741, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 's': [0.15555555555555556, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'd': [0.25555555555555554, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'f': [0.35462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'g': [0.4546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'h': [0.5546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'j': [0.6546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'k': [0.7546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'l': [0.8555555555555555, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'shift': [0.007407407407407408, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'z': [0.15555555555555556, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'x': [0.25555555555555554, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'c': [0.35462962962962963, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'v': [0.4546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'b': [0.5546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'n': [0.6546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'm': [0.7546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'backspace': [0.8555555555555555, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'numbers': [0.007407407407407408, 0.8080821917808219, 0.125, 0.1643835616438356], 'smiley': [0.14351851851851852, 0.8080821917808219, 0.10277777777777777, 0.1643835616438356], 'spacebar': [0.25555555555555554, 0.8080821917808219, 0.48703703703703705, 0.1643835616438356], '.': [0.7546296296296297, 0.8080821917808219, 0.1, 0.1643835616438356], 'enter': [0.8648148148148148, 0.8080821917808219, 0.12962962962962962, 0.1643835616438356]}, 'uppercase': {'Q': [0.007407407407407408, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'W': [0.10462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'E': [0.20462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'R': [0.30462962962962964, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'T': [0.4046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'Y': [0.5046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'U': [0.6046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'I': [0.7046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'O': [0.8046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'P': [0.9046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'A': [0.05740740740740741, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'S': [0.15555555555555556, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'D': [0.25555555555555554, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'F': [0.35462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'G': [0.4546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'H': [0.5546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'J': [0.6546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'K': [0.7546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'L': [0.8555555555555555, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'shift': [0.007407407407407408, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'Z': [0.15555555555555556, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'X': [0.25555555555555554, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'C': [0.35462962962962963, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'V': [0.4546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'B': [0.5546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'N': [0.6546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'M': [0.7546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'backspace': [0.8555555555555555, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'numbers': [0.007407407407407408, 0.8080821917808219, 0.125, 0.1643835616438356], 'smiley': [0.14351851851851852, 0.8080821917808219, 0.10277777777777777, 0.1643835616438356], 'spacebar': [0.25555555555555554, 0.8080821917808219, 0.48703703703703705, 0.1643835616438356], '.': [0.7546296296296297, 0.8080821917808219, 0.1, 0.1643835616438356], 'enter': [0.8648148148148148, 0.8080821917808219, 0.12962962962962962, 0.1643835616438356]}, 'numbers': {'1': [0.007407407407407408, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '2': [0.10462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '3': [0.20462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '4': [0.30462962962962964, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '5': [0.4046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '6': [0.5046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '7': [0.6046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '8': [0.7046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '9': [0.8046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '0': [0.9046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '-': [0.007407407407407408, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '/': [0.10462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], ':': [0.20462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], ';': [0.30462962962962964, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '(': [0.4046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], ')': [0.5046296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '$': [0.6046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '&': [0.7046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '@': [0.8046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '\"': [0.9046296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'shift': [0.007407407407407408, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], ',': [0.3101851851851852, 0.5994520547945206, 0.12, 0.1643835616438356], '?': [0.44044444444444447, 0.5994520547945206, 0.12, 0.1643835616438356], '!': [0.5707037037037037, 0.5994520547945206, 0.12, 0.1643835616438356], \"'\": [0.705962962962963, 0.5994520547945206, 0.12, 0.1643835616438356], 'backspace': [0.8551851851851852, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'letters': [0.007407407407407408, 0.8080821917808219, 0.125, 0.1643835616438356], 'smiley': [0.14351851851851852, 0.8080821917808219, 0.10277777777777777, 0.1643835616438356], 'spacebar': [0.25555555555555554, 0.8080821917808219, 0.48703703703703705, 0.1643835616438356], '.': [0.7546296296296297, 0.8080821917808219, 0.1, 0.1643835616438356], 'enter': [0.8648148148148148, 0.8080821917808219, 0.12962962962962962, 0.1643835616438356]}}","text":""},{"location":"internals/#gesturepy_1","title":"gesture.py","text":""},{"location":"internals/#kebbie.gesture.MAX_RADIUS","title":"MAX_RADIUS = 16","text":""},{"location":"internals/#kebbie.gesture.MIN_N_POINTS_PER_DIST","title":"MIN_N_POINTS_PER_DIST = 0.1","text":""},{"location":"internals/#kebbie.gesture.MAX_N_POINTS_PER_DIST","title":"MAX_N_POINTS_PER_DIST = 0.25","text":""},{"location":"internals/#kebbie.gesture.MIN_ACCELERATION","title":"MIN_ACCELERATION = 0.2","text":""},{"location":"internals/#kebbie.gesture.MAX_ACCELERATION","title":"MAX_ACCELERATION = 0.5","text":""},{"location":"internals/#layoutpy_1","title":"layout.py","text":""},{"location":"internals/#kebbie.layout.SPACE","title":"SPACE = 'spacebar'","text":""},{"location":"internals/#kebbie.layout.POINT","title":"POINT = '.'","text":""},{"location":"internals/#kebbie.layout.N_ACCENT_PER_LINE","title":"N_ACCENT_PER_LINE = 4","text":""},{"location":"internals/#noise_modelpy_1","title":"noise_model.py","text":""},{"location":"internals/#kebbie.noise_model.DEFAULT_TYPO_PROBS","title":"DEFAULT_TYPO_PROBS = {Typo.TRANSPOSE_CHAR: 0.01, Typo.DELETE_SPELLING_SYMBOL: 0.1, Typo.ADD_SPELLING_SYMBOL: 0, Typo.DELETE_SPACE: 0.01, Typo.ADD_SPACE: 0, Typo.DELETE_PUNCTUATION: 0, Typo.ADD_PUNCTUATION: 0, Typo.DELETE_CHAR: 0.005, Typo.ADD_CHAR: 0.005, Typo.SIMPLIFY_ACCENT: 0.08, Typo.SIMPLIFY_CASE: 0.08, Typo.COMMON_TYPO: 0.05}","text":""},{"location":"internals/#kebbie.noise_model.SPACE","title":"SPACE = ' '","text":""},{"location":"internals/#kebbie.noise_model.DELETIONS","title":"DELETIONS = [Typo.DELETE_SPELLING_SYMBOL, Typo.DELETE_SPACE, Typo.DELETE_PUNCTUATION, Typo.DELETE_CHAR]","text":""},{"location":"internals/#kebbie.noise_model.FRONT_DELETION_MULTIPLIER","title":"FRONT_DELETION_MULTIPLIER = 0.36","text":""},{"location":"internals/#kebbie.noise_model.DEFAULT_SIGMA_RATIO","title":"DEFAULT_SIGMA_RATIO = 3","text":""},{"location":"internals/#kebbie.noise_model.CACHE_DIR","title":"CACHE_DIR = os.path.expanduser('~/.cache/common_typos/')","text":""},{"location":"internals/#kebbie.noise_model.TWEET_TYPO_CORPUS_URL","title":"TWEET_TYPO_CORPUS_URL = 'https://luululu.com/tweet/typo-corpus-r1.txt'","text":""},{"location":"internals/#oraclepy_1","title":"oracle.py","text":""},{"location":"internals/#kebbie.oracle.CHUNK_SIZE","title":"CHUNK_SIZE = 10","text":""},{"location":"internals/#kebbie.oracle.MAX_CHAR_PER_SENTENCE","title":"MAX_CHAR_PER_SENTENCE = 256","text":""},{"location":"internals/#kebbie.oracle.SWIPE_PROB","title":"SWIPE_PROB = 0.01","text":""},{"location":"internals/#scorerpy_1","title":"scorer.py","text":""},{"location":"internals/#kebbie.scorer.DEFAULT_BETA","title":"DEFAULT_BETA = 0.9","text":""},{"location":"internals/#kebbie.scorer.WITH_TYPO","title":"WITH_TYPO = 'with_typo'","text":""},{"location":"internals/#kebbie.scorer.WITHOUT_TYPO","title":"WITHOUT_TYPO = 'without_typo'","text":""},{"location":"internals/#utilspy_1","title":"utils.py","text":""},{"location":"internals/#kebbie.utils.SEC_TO_NANOSEC","title":"SEC_TO_NANOSEC = 10000000000.0","text":""},{"location":"leaderboard/","title":"Leaderboard","text":"Keyboard Score Next-word prediction Auto-completion Auto-correction Gboard 0.54 0.33 0.79 0.82 iOS keyboard 0.46 0.43 0.8 0.6 Fleksy 0.43 0.27 0.73 0.6 KeyboardKit Pro 0.31 0 0.4 0.58 KeyboardKit Open-source 0 0 0 0.01

Info

The metrics used in this leaderboard are :

  • For next-word prediction : top-3 accuracy
  • For auto-completion : top-3 accuracy
  • For auto-correction : F-score

See Understanding the metrics for more details.

The overall score is a weighted sum of each task's score.

"},{"location":"public_api/","title":"Public API","text":""},{"location":"public_api/#classes","title":"Classes","text":""},{"location":"public_api/#kebbie.correctors.Corrector","title":"Corrector","text":"

Base class for Corrector, which is the component being tested.

Child classes should overwrite auto_correct(), auto_complete(), resolve_swipe(), and predict_next_word().

By default, the implementation for these methods is dummy : just return an empty list of candidates.

Source code in kebbie/correctors.py
class Corrector:\n    \"\"\"Base class for Corrector, which is the component being tested.\n\n    Child classes should overwrite `auto_correct()`, `auto_complete()`,\n    `resolve_swipe()`, and `predict_next_word()`.\n\n    By default, the implementation for these methods is dummy : just return an\n    empty list of candidates.\n    \"\"\"\n\n    def auto_correct(\n        self,\n        context: str,\n        keystrokes: List[Optional[Tuple[float, float]]],\n        word: str,\n    ) -> List[str]:\n        \"\"\"Method used for auto-correction.\n        Given a context and a typed word, this method should return a list of\n        possible candidates for correction.\n\n        Note that the typed word is given both as a plain string, and as a list\n        of keystrokes. The child class overwriting this method can use either\n        of them.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n            keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n                (x and y coordinates) for each keystroke of the word being\n                typed.\n            word (str): Word being typed (corresponding to the keystrokes).\n\n        Returns:\n            The list of correction candidates.\n        \"\"\"\n        return []\n\n    def auto_complete(\n        self,\n        context: str,\n        keystrokes: List[Optional[Tuple[float, float]]],\n        partial_word: str,\n    ) -> List[str]:\n        \"\"\"Method used for auto-completion.\n        Given a context and a partially typed word, this method should return\n        a list of possible candidates for completion.\n\n        Note that the typed word is given both as a plain string, and as a list\n        of keystrokes. The child class overwriting this method can use either\n        of them.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n            keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n                (x and y coordinates) for each keystroke of the word being\n                typed.\n            partial_word (str): Partial word being typed (corresponding to the\n                keystrokes).\n\n        Returns:\n            The list of completion candidates.\n        \"\"\"\n        return []\n\n    def resolve_swipe(self, context: str, swipe_gesture: List[Tuple[float, float]]) -> List[str]:\n        \"\"\"Method used for resolving a swipe gesture. Given a context and a\n        swipe gesture, this method should return a list of possible candidates\n        corresponding to this swipe gesture.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n            swipe_gesture (List[Tuple[float, float]]): List of positions (x and\n                y coordinates) along the keyboard, representing the swipe\n                gesture.\n\n        Returns:\n            The list of swiped word candidates.\n        \"\"\"\n        return []\n\n    def predict_next_word(self, context: str) -> List[str]:\n        \"\"\"Method used for next-word prediction task. Given a context, this\n        method should return a list of possible candidates for next-word.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n\n        Returns:\n            The list of next-word candidates.\n        \"\"\"\n        return []\n\n    def profiled_auto_correct(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n        \"\"\"Profiled (memory & runtime) version of `auto_correct` method.\n\n        No need to overwrite this method, unless you want to specify a custom\n        memory and/or runtime measure.\n\n        Returns:\n            List of candidates returned from the profiled method.\n            Memory consumption in bytes.\n            Runtime in nano seconds.\n        \"\"\"\n        return profile_fn(self.auto_correct, *args, **kwargs)\n\n    def profiled_auto_complete(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n        \"\"\"Profiled (memory & runtime) version of `auto_complete` method.\n\n        No need to overwrite this method, unless you want to specify a custom\n        memory and/or runtime measure.\n\n        Returns:\n            List of candidates returned from the profiled method.\n            Memory consumption in bytes.\n            Runtime in nano seconds.\n        \"\"\"\n        return profile_fn(self.auto_complete, *args, **kwargs)\n\n    def profiled_resolve_swipe(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n        \"\"\"Profiled (memory & runtime) version of `resolve_swipe` method.\n\n        No need to overwrite this method, unless you want to specify a custom\n        memory and/or runtime measure.\n\n        Returns:\n            List of candidates returned from the profiled method.\n            Memory consumption in bytes.\n            Runtime in nano seconds.\n        \"\"\"\n        return profile_fn(self.resolve_swipe, *args, **kwargs)\n\n    def profiled_predict_next_word(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n        \"\"\"Profiled (memory & runtime) version of `predict_next_word` method.\n\n        No need to overwrite this method, unless you want to specify a custom\n        memory and/or runtime measure.\n\n        Returns:\n            List of candidates returned from the profiled method.\n            Memory consumption in bytes.\n            Runtime in nano seconds.\n        \"\"\"\n        return profile_fn(self.predict_next_word, *args, **kwargs)\n
"},{"location":"public_api/#kebbie.correctors.Corrector.auto_correct","title":"auto_correct(context, keystrokes, word)","text":"

Method used for auto-correction. Given a context and a typed word, this method should return a list of possible candidates for correction.

Note that the typed word is given both as a plain string, and as a list of keystrokes. The child class overwriting this method can use either of them.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required keystrokes List[Optional[Tuple[float, float]]]

List of positions (x and y coordinates) for each keystroke of the word being typed.

required word str

Word being typed (corresponding to the keystrokes).

required

Returns:

Type Description List[str]

The list of correction candidates.

Source code in kebbie/correctors.py
def auto_correct(\n    self,\n    context: str,\n    keystrokes: List[Optional[Tuple[float, float]]],\n    word: str,\n) -> List[str]:\n    \"\"\"Method used for auto-correction.\n    Given a context and a typed word, this method should return a list of\n    possible candidates for correction.\n\n    Note that the typed word is given both as a plain string, and as a list\n    of keystrokes. The child class overwriting this method can use either\n    of them.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n        keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n            (x and y coordinates) for each keystroke of the word being\n            typed.\n        word (str): Word being typed (corresponding to the keystrokes).\n\n    Returns:\n        The list of correction candidates.\n    \"\"\"\n    return []\n
"},{"location":"public_api/#kebbie.correctors.Corrector.auto_complete","title":"auto_complete(context, keystrokes, partial_word)","text":"

Method used for auto-completion. Given a context and a partially typed word, this method should return a list of possible candidates for completion.

Note that the typed word is given both as a plain string, and as a list of keystrokes. The child class overwriting this method can use either of them.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required keystrokes List[Optional[Tuple[float, float]]]

List of positions (x and y coordinates) for each keystroke of the word being typed.

required partial_word str

Partial word being typed (corresponding to the keystrokes).

required

Returns:

Type Description List[str]

The list of completion candidates.

Source code in kebbie/correctors.py
def auto_complete(\n    self,\n    context: str,\n    keystrokes: List[Optional[Tuple[float, float]]],\n    partial_word: str,\n) -> List[str]:\n    \"\"\"Method used for auto-completion.\n    Given a context and a partially typed word, this method should return\n    a list of possible candidates for completion.\n\n    Note that the typed word is given both as a plain string, and as a list\n    of keystrokes. The child class overwriting this method can use either\n    of them.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n        keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n            (x and y coordinates) for each keystroke of the word being\n            typed.\n        partial_word (str): Partial word being typed (corresponding to the\n            keystrokes).\n\n    Returns:\n        The list of completion candidates.\n    \"\"\"\n    return []\n
"},{"location":"public_api/#kebbie.correctors.Corrector.resolve_swipe","title":"resolve_swipe(context, swipe_gesture)","text":"

Method used for resolving a swipe gesture. Given a context and a swipe gesture, this method should return a list of possible candidates corresponding to this swipe gesture.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required swipe_gesture List[Tuple[float, float]]

List of positions (x and y coordinates) along the keyboard, representing the swipe gesture.

required

Returns:

Type Description List[str]

The list of swiped word candidates.

Source code in kebbie/correctors.py
def resolve_swipe(self, context: str, swipe_gesture: List[Tuple[float, float]]) -> List[str]:\n    \"\"\"Method used for resolving a swipe gesture. Given a context and a\n    swipe gesture, this method should return a list of possible candidates\n    corresponding to this swipe gesture.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n        swipe_gesture (List[Tuple[float, float]]): List of positions (x and\n            y coordinates) along the keyboard, representing the swipe\n            gesture.\n\n    Returns:\n        The list of swiped word candidates.\n    \"\"\"\n    return []\n
"},{"location":"public_api/#kebbie.correctors.Corrector.predict_next_word","title":"predict_next_word(context)","text":"

Method used for next-word prediction task. Given a context, this method should return a list of possible candidates for next-word.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required

Returns:

Type Description List[str]

The list of next-word candidates.

Source code in kebbie/correctors.py
def predict_next_word(self, context: str) -> List[str]:\n    \"\"\"Method used for next-word prediction task. Given a context, this\n    method should return a list of possible candidates for next-word.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n\n    Returns:\n        The list of next-word candidates.\n    \"\"\"\n    return []\n
"},{"location":"public_api/#kebbie.correctors.Corrector.profiled_auto_correct","title":"profiled_auto_correct(*args, **kwargs)","text":"

Profiled (memory & runtime) version of auto_correct method.

No need to overwrite this method, unless you want to specify a custom memory and/or runtime measure.

Returns:

Type Description List[str]

List of candidates returned from the profiled method.

int

Memory consumption in bytes.

int

Runtime in nano seconds.

Source code in kebbie/correctors.py
def profiled_auto_correct(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n    \"\"\"Profiled (memory & runtime) version of `auto_correct` method.\n\n    No need to overwrite this method, unless you want to specify a custom\n    memory and/or runtime measure.\n\n    Returns:\n        List of candidates returned from the profiled method.\n        Memory consumption in bytes.\n        Runtime in nano seconds.\n    \"\"\"\n    return profile_fn(self.auto_correct, *args, **kwargs)\n
"},{"location":"public_api/#kebbie.correctors.Corrector.profiled_auto_complete","title":"profiled_auto_complete(*args, **kwargs)","text":"

Profiled (memory & runtime) version of auto_complete method.

No need to overwrite this method, unless you want to specify a custom memory and/or runtime measure.

Returns:

Type Description List[str]

List of candidates returned from the profiled method.

int

Memory consumption in bytes.

int

Runtime in nano seconds.

Source code in kebbie/correctors.py
def profiled_auto_complete(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n    \"\"\"Profiled (memory & runtime) version of `auto_complete` method.\n\n    No need to overwrite this method, unless you want to specify a custom\n    memory and/or runtime measure.\n\n    Returns:\n        List of candidates returned from the profiled method.\n        Memory consumption in bytes.\n        Runtime in nano seconds.\n    \"\"\"\n    return profile_fn(self.auto_complete, *args, **kwargs)\n
"},{"location":"public_api/#kebbie.correctors.Corrector.profiled_resolve_swipe","title":"profiled_resolve_swipe(*args, **kwargs)","text":"

Profiled (memory & runtime) version of resolve_swipe method.

No need to overwrite this method, unless you want to specify a custom memory and/or runtime measure.

Returns:

Type Description List[str]

List of candidates returned from the profiled method.

int

Memory consumption in bytes.

int

Runtime in nano seconds.

Source code in kebbie/correctors.py
def profiled_resolve_swipe(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n    \"\"\"Profiled (memory & runtime) version of `resolve_swipe` method.\n\n    No need to overwrite this method, unless you want to specify a custom\n    memory and/or runtime measure.\n\n    Returns:\n        List of candidates returned from the profiled method.\n        Memory consumption in bytes.\n        Runtime in nano seconds.\n    \"\"\"\n    return profile_fn(self.resolve_swipe, *args, **kwargs)\n
"},{"location":"public_api/#kebbie.correctors.Corrector.profiled_predict_next_word","title":"profiled_predict_next_word(*args, **kwargs)","text":"

Profiled (memory & runtime) version of predict_next_word method.

No need to overwrite this method, unless you want to specify a custom memory and/or runtime measure.

Returns:

Type Description List[str]

List of candidates returned from the profiled method.

int

Memory consumption in bytes.

int

Runtime in nano seconds.

Source code in kebbie/correctors.py
def profiled_predict_next_word(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n    \"\"\"Profiled (memory & runtime) version of `predict_next_word` method.\n\n    No need to overwrite this method, unless you want to specify a custom\n    memory and/or runtime measure.\n\n    Returns:\n        List of candidates returned from the profiled method.\n        Memory consumption in bytes.\n        Runtime in nano seconds.\n    \"\"\"\n    return profile_fn(self.predict_next_word, *args, **kwargs)\n
"},{"location":"public_api/#functions","title":"Functions","text":""},{"location":"public_api/#kebbie.evaluate","title":"evaluate(corrector, lang='en-US', custom_keyboard=None, dataset=None, track_mistakes=False, n_most_common_mistakes=N_MOST_COMMON_MISTAKES, n_proc=None, seed=DEFAULT_SEED, beta=DEFAULT_BETA)","text":"

Main function of the kebbie framework, it evaluates the given Corrector.

Parameters:

Name Type Description Default corrector Corrector

The corrector to evaluate.

required lang str

Language to test. For now, only en-US is supported.

'en-US' custom_keyboard Dict

If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.

None dataset Dict[str, List[str]]

Data to use for testing. It should be a dictionary where the key is the name of the domain, and the value is a list of sentences. If None is given, it will use the SODA dataset.

None track_mistakes bool

If True, we will track the most common mistakes of the Corrector (these will be saved as TSV files in the working directory).

False n_most_common_mistakes int

If track_mistakes is set to True, the top X mistakes to record.

N_MOST_COMMON_MISTAKES n_proc int

Number of processes to use. If None, os.cpu_count() is used.

None seed int

Seed to use for running the tests.

DEFAULT_SEED beta float

Beta to use for computing the F-beta score.

DEFAULT_BETA

Raises:

Type Description UnsupportedLanguage

Exception raised if lang is set to a language that is not supported yet.

Returns:

Type Description Dict

The results, in a dictionary.

Source code in kebbie/__init__.py
def evaluate(\n    corrector: Corrector,\n    lang: str = \"en-US\",\n    custom_keyboard: Dict = None,\n    dataset: Dict[str, List[str]] = None,\n    track_mistakes: bool = False,\n    n_most_common_mistakes: int = N_MOST_COMMON_MISTAKES,\n    n_proc: Optional[int] = None,\n    seed: int = DEFAULT_SEED,\n    beta: float = DEFAULT_BETA,\n) -> Dict:\n    \"\"\"Main function of the `kebbie` framework, it evaluates the given\n    Corrector.\n\n    Args:\n        corrector (Corrector): The corrector to evaluate.\n        lang (str, optional): Language to test. For now, only `en-US` is\n            supported.\n        custom_keyboard (Dict, optional): If provided, instead of relying on\n            the keyboard layout provided by default, uses the given keyboard\n            layout.\n        dataset (Dict[str, List[str]], optional): Data to use for testing. It\n            should be a dictionary where the key is the name of the domain, and\n            the value is a list of sentences. If `None` is given, it will use\n            the SODA dataset.\n        track_mistakes (bool, optional): If `True`, we will track the most\n            common mistakes of the Corrector (these will be saved as TSV files\n            in the working directory).\n        n_most_common_mistakes (int, optional): If `track_mistakes` is set to\n            `True`, the top X mistakes to record.\n        n_proc (int, optional): Number of processes to use. If `None`,\n            `os.cpu_count()` is used.\n        seed (int): Seed to use for running the tests.\n        beta (float, optional): Beta to use for computing the F-beta score.\n\n    Raises:\n        UnsupportedLanguage: Exception raised if `lang` is set to a language\n            that is not supported yet.\n\n    Returns:\n        The results, in a dictionary.\n    \"\"\"\n    if lang not in SUPPORTED_LANG and custom_keyboard is None:\n        raise UnsupportedLanguage(f\"{lang} is not supported yet. List of supported languages : {SUPPORTED_LANG}\")\n\n    if dataset is None:\n        dataset = get_soda_dataset()\n\n    # Create the Oracle, the class used to create test cases and evaluate the scores\n    oracle = Oracle(\n        lang,\n        dataset,\n        custom_keyboard=custom_keyboard,\n        track_mistakes=track_mistakes,\n        n_most_common_mistakes=n_most_common_mistakes,\n        beta=beta,\n    )\n\n    # Run the tests & get the results\n    results = oracle.test(corrector, n_proc=n_proc, seed=seed)\n    return results\n
"},{"location":"public_api/#exceptions","title":"Exceptions","text":""},{"location":"public_api/#kebbie.UnsupportedLanguage","title":"UnsupportedLanguage","text":"

Bases: Exception

Custom Exception when the required language is not supported.

Source code in kebbie/__init__.py
class UnsupportedLanguage(Exception):\n    \"\"\"Custom Exception when the required language is not supported.\"\"\"\n\n    pass\n
"},{"location":"usage/","title":"Usage","text":"

kebbie exposes a class Corrector and a function evaluate().

The user creates a custom class which inherits from Corrector, over-write methods such as auto_correct(), auto_complete(), predict_next_word(), and resolve_swipe(). Then the user calls evaluate() with the custom Corrector, which will run the benchmark and return the results as a Dictionary (it contains various metrics for each task).

Let's see how to do that in details with a basic example : we will use pyspellchecker, a pure-Python spell-checking library, and test it using kebbie to see how well it performs.

"},{"location":"usage/#creating-your-own-corrector","title":"Creating your own Corrector","text":"

First, we define a subclass of Corrector, and we implement the constructor.

In our case, the constructor will simply initialize the pyspellchecker library :

from spellchecker import SpellChecker\nfrom kebbie import Corrector\n\n\nclass ExampleCorrector(Corrector):\n    def __init__(self):\n        self.spellchecker = SpellChecker()\n

For this example we are only interested in auto-correction (spell-checking). So we need to over-write the auto_correct() method.

The implementation is straightforward thanks to pyspellchecker :

from typing import List\n\nfrom spellchecker import SpellChecker\nfrom kebbie import Corrector\n\n\nclass ExampleCorrector(Corrector):\n    def __init__(self):\n        self.spellchecker = SpellChecker()\n\n    def auto_correct(self, context: str, keystrokes, word: str) -> List[str]:\n        cands = self.spellchecker.candidates(word)\n        return list(cands) if cands is not None else []\n

Great ! We have a testable Corrector class.

Info

We didn't overwrite the methods for the other tasks, and that's fine ! Other tasks' score will be set to 0, but we are just interested in auto-correction score anyway.

"},{"location":"usage/#calling-the-evaluate-function","title":"Calling the evaluate() function","text":"

Once we have the Corrector implemented, we can simply instantiate it and call the evaluate() function :

import json\nfrom typing import List\n\nfrom spellchecker import SpellChecker\nfrom kebbie import Corrector, evaluate\n\n\nclass ExampleCorrector(Corrector):\n    def __init__(self):\n        self.spellchecker = SpellChecker()\n\n    def auto_correct(self, context: str, keystrokes, word: str) -> List[str]:\n        cands = self.spellchecker.candidates(word)\n        return list(cands) if cands is not None else []\n\n\nif __name__ == \"__main__\":\n    corrector = ExampleCorrector()\n    results = evaluate(corrector)\n\n    # Save the results in a local file for later inspection\n    with open(\"results.json\", \"w\") as f:\n        json.dump(results, f, ensure_ascii=False, indent=4)\n

And that's it !

Now you can just run your script. It might take some time to go over the 2 000 sentences of the test set, but eventually it will end and you should see a file results.json in your working directory.

"},{"location":"usage/#inspecting-the-results","title":"Inspecting the results","text":"

Go ahead and open the file results.json.

It contains the results of the test, with various metrics.

Results for pyspellchecker==0.8.1 at the time of writing
{\n    \"next_word_prediction\": {\n        \"score\": {\n            \"accuracy\": 0,\n            \"top3_accuracy\": 0,\n            \"n\": 46978\n        },\n        \"per_domain\": {\n            \"narrative\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 32044\n            },\n            \"dialogue\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 14934\n            }\n        },\n        \"performances\": {\n            \"mean_memory\": \"865.0 KB\",\n            \"min_memory\": \"8.24 KB\",\n            \"max_memory\": \"1.1 MB\",\n            \"mean_runtime\": \"5.91 \u03bcs\",\n            \"fastest_runtime\": \"0 ns\",\n            \"slowest_runtime\": \"2.13 ms\"\n        }\n    },\n    \"auto_completion\": {\n        \"score\": {\n            \"accuracy\": 0,\n            \"top3_accuracy\": 0,\n            \"n\": 46910\n        },\n        \"per_domain\": {\n            \"narrative\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 32002\n            },\n            \"dialogue\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 14908\n            }\n        },\n        \"per_completion_rate\": {\n            \"<25%\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 1335\n            },\n            \"25%~50%\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 8891\n            },\n            \"50%~75%\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 25757\n            },\n            \">75%\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 10927\n            }\n        },\n        \"per_other\": {\n            \"without_typo\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 43450\n            },\n            \"with_typo\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 3460\n            }\n        },\n        \"performances\": {\n            \"mean_memory\": \"865.0 KB\",\n            \"min_memory\": \"424 B\",\n            \"max_memory\": \"1.1 MB\",\n            \"mean_runtime\": \"9.57 \u03bcs\",\n            \"fastest_runtime\": \"0 ns\",\n            \"slowest_runtime\": \"89.8 ms\"\n        }\n    },\n    \"auto_correction\": {\n        \"score\": {\n            \"accuracy\": 0.87,\n            \"precision\": 0.47,\n            \"recall\": 0.35,\n            \"fscore\": 0.41,\n            \"top3_accuracy\": 0.88,\n            \"top3_precision\": 0.56,\n            \"top3_recall\": 0.5,\n            \"top3_fscore\": 0.53,\n            \"n_typo\": 6302,\n            \"n\": 48864\n        },\n        \"per_domain\": {\n            \"narrative\": {\n                \"accuracy\": 0.87,\n                \"precision\": 0.48,\n                \"recall\": 0.36,\n                \"fscore\": 0.42,\n                \"top3_accuracy\": 0.89,\n                \"top3_precision\": 0.57,\n                \"top3_recall\": 0.51,\n                \"top3_fscore\": 0.54,\n                \"n_typo\": 4247,\n                \"n\": 32948\n            },\n            \"dialogue\": {\n                \"accuracy\": 0.86,\n                \"precision\": 0.44,\n                \"recall\": 0.34,\n                \"fscore\": 0.39,\n                \"top3_accuracy\": 0.88,\n                \"top3_precision\": 0.53,\n                \"top3_recall\": 0.48,\n                \"top3_fscore\": 0.51,\n                \"n_typo\": 2055,\n                \"n\": 15916\n            }\n        },\n        \"per_typo_type\": {\n            \"DELETE_SPELLING_SYMBOL\": {\n                \"accuracy\": 0.83,\n                \"precision\": 0.15,\n                \"recall\": 0.07,\n                \"fscore\": 0.099,\n                \"top3_accuracy\": 0.84,\n                \"top3_precision\": 0.26,\n                \"top3_recall\": 0.14,\n                \"top3_fscore\": 0.19,\n                \"n_typo\": 129,\n                \"n\": 1000\n            },\n            \"DELETE_SPACE\": {\n                \"accuracy\": 0.83,\n                \"precision\": 0.11,\n                \"recall\": 0.051,\n                \"fscore\": 0.074,\n                \"top3_accuracy\": 0.83,\n                \"top3_precision\": 0.11,\n                \"top3_recall\": 0.051,\n                \"top3_fscore\": 0.074,\n                \"n_typo\": 137,\n                \"n\": 1062\n            },\n            \"DELETE_PUNCTUATION\": {\n                \"accuracy\": 0,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 0,\n                \"n\": 0\n            },\n            \"DELETE_CHAR\": {\n                \"accuracy\": 0.86,\n                \"precision\": 0.42,\n                \"recall\": 0.29,\n                \"fscore\": 0.35,\n                \"top3_accuracy\": 0.88,\n                \"top3_precision\": 0.55,\n                \"top3_recall\": 0.48,\n                \"top3_fscore\": 0.52,\n                \"n_typo\": 559,\n                \"n\": 4334\n            },\n            \"ADD_SPELLING_SYMBOL\": {\n                \"accuracy\": 0,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 0,\n                \"n\": 0\n            },\n            \"ADD_SPACE\": {\n                \"accuracy\": 0,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 0,\n                \"n\": 0\n            },\n            \"ADD_PUNCTUATION\": {\n                \"accuracy\": 0,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 0,\n                \"n\": 0\n            },\n            \"ADD_CHAR\": {\n                \"accuracy\": 0.9,\n                \"precision\": 0.6,\n                \"recall\": 0.59,\n                \"fscore\": 0.59,\n                \"top3_accuracy\": 0.92,\n                \"top3_precision\": 0.66,\n                \"top3_recall\": 0.76,\n                \"top3_fscore\": 0.7,\n                \"n_typo\": 855,\n                \"n\": 6629\n            },\n            \"SUBSTITUTE_CHAR\": {\n                \"accuracy\": 0.86,\n                \"precision\": 0.47,\n                \"recall\": 0.35,\n                \"fscore\": 0.4,\n                \"top3_accuracy\": 0.88,\n                \"top3_precision\": 0.55,\n                \"top3_recall\": 0.49,\n                \"top3_fscore\": 0.53,\n                \"n_typo\": 863,\n                \"n\": 6691\n            },\n            \"SIMPLIFY_ACCENT\": {\n                \"accuracy\": 0,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 0,\n                \"n\": 0\n            },\n            \"SIMPLIFY_CASE\": {\n                \"accuracy\": 0.82,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0.82,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 403,\n                \"n\": 3125\n            },\n            \"TRANSPOSE_CHAR\": {\n                \"accuracy\": 0.89,\n                \"precision\": 0.58,\n                \"recall\": 0.54,\n                \"fscore\": 0.56,\n                \"top3_accuracy\": 0.91,\n                \"top3_precision\": 0.64,\n                \"top3_recall\": 0.7,\n                \"top3_fscore\": 0.66,\n                \"n_typo\": 1313,\n                \"n\": 10181\n            },\n            \"COMMON_TYPO\": {\n                \"accuracy\": 0.85,\n                \"precision\": 0.39,\n                \"recall\": 0.26,\n                \"fscore\": 0.32,\n                \"top3_accuracy\": 0.88,\n                \"top3_precision\": 0.53,\n                \"top3_recall\": 0.45,\n                \"top3_fscore\": 0.49,\n                \"n_typo\": 1725,\n                \"n\": 13375\n            }\n        },\n        \"per_number_of_typos\": {\n            \"1\": {\n                \"accuracy\": 0.87,\n                \"precision\": 0.47,\n                \"recall\": 0.36,\n                \"fscore\": 0.41,\n                \"top3_accuracy\": 0.89,\n                \"top3_precision\": 0.56,\n                \"top3_recall\": 0.51,\n                \"top3_fscore\": 0.54,\n                \"n_typo\": 5984,\n                \"n\": 46397\n            },\n            \"2\": {\n                \"accuracy\": 0.86,\n                \"precision\": 0.43,\n                \"recall\": 0.29,\n                \"fscore\": 0.35,\n                \"top3_accuracy\": 0.87,\n                \"top3_precision\": 0.47,\n                \"top3_recall\": 0.36,\n                \"top3_fscore\": 0.41,\n                \"n_typo\": 292,\n                \"n\": 2264\n            },\n            \"3+\": {\n                \"accuracy\": 0.83,\n                \"precision\": 0.17,\n                \"recall\": 0.077,\n                \"fscore\": 0.11,\n                \"top3_accuracy\": 0.84,\n                \"top3_precision\": 0.23,\n                \"top3_recall\": 0.12,\n                \"top3_fscore\": 0.16,\n                \"n_typo\": 26,\n                \"n\": 202\n            }\n        },\n        \"performances\": {\n            \"mean_memory\": \"866.0 KB\",\n            \"min_memory\": \"7.05 KB\",\n            \"max_memory\": \"1.1 MB\",\n            \"mean_runtime\": \"358.0 ms\",\n            \"fastest_runtime\": \"69.1 \u03bcs\",\n            \"slowest_runtime\": \"77.1 s\"\n        }\n    },\n    \"swipe_resolution\": {\n        \"score\": {\n            \"accuracy\": 0,\n            \"top3_accuracy\": 0,\n            \"n\": 417\n        },\n        \"per_domain\": {\n            \"narrative\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 313\n            },\n            \"dialogue\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 104\n            }\n        },\n        \"performances\": {\n            \"mean_memory\": \"860.0 KB\",\n            \"min_memory\": \"96.0 KB\",\n            \"max_memory\": \"1.1 MB\",\n            \"mean_runtime\": \"24.5 \u03bcs\",\n            \"fastest_runtime\": \"0 ns\",\n            \"slowest_runtime\": \"4.68 ms\"\n        }\n    },\n    \"overall_score\": 0.164\n}\n

Let's go over the content quickly.

First, the metrics are divided into each tasks :

  • next_word_prediction
  • auto_completion
  • auto_correction
  • swipe_resolution

Info

At the end of the file, there is also a field overall_score. This is just an aggregation of the scores of all tasks, to have an easy way to compare runs.

As expected, if you look at other tasks than auto_correction, their score is zero. That's expected, because we are interested only on auto-correction, and we didn't implement the code for the other tasks.

Let's take a deeper look at the auto_correction results.

First, we have a score field, which contains various overall metrics about the auto-correction capability : precision, recall, F-score, etc...

There is also a value n, which shows the total number of words we tried to auto-correct, and n_typo, the number of words which contained a typo.

For auto-correction, the metric we care about is the F-score, as it measure both the precision and the recall.

Info

For more information about the metrics and their meaning, check out the Metrics page.

Then we have a per_domain field, which also contains the same metrics, but divided into the various domains of our dataset. We can see that pyspellchecker is better at correcting narrative data than dialogue data, since the F-score is higher.

We then have a per_typo_type field, which shows the metrics for each type of typo introduced. Note that the evaluate() does not introduce all type of typos by default, so some of them are set to 0.

After we have a per_number_of_typos field, which gives the metrics depending on how many typos were introduced in that word.

And finally we have a field performances, which show the memory consumption and runtime for the auto_correct() method that we wrote.

"},{"location":"usage/#a-note-about-multiprocessing","title":"A note about multiprocessing","text":"

Under the hood, evaluate() uses multiprocessing to run faster.

It means that your Corrector should be pickable !

Example

In the example above, the implementation provided is already pickable, so there is nothing to do.

If you need to make your class pickable, just implement the __reduce__() magic method, like this :

from typing import Tuple\n\nfrom kebbie import Corrector\n\n\nclass GreatCorrector(Corrector):\n    def __init__(self, model_path: str):\n        self.m_path = model_path\n\n        # Because of this (imaginary) non-pickable attribute,\n        # the class `GreatCorrector` is not pickable as-is\n        self.non_pickable_model = load_model(model_path)\n\n    def __reduce__(self) -> Tuple:\n        # But by implementing `__reduce__()`, we can make it pickable !\n        return (GreatCorrector, (self.m_path,))\n
"},{"location":"usage/#advanced-usage","title":"Advanced usage","text":""},{"location":"usage/#leveraging-the-keystroke-coordinates","title":"Leveraging the keystroke coordinates","text":"

Did you notice that in our auto_correct() implementation, there is an argument keystrokes that we didn't use ?

class ExampleCorrector(Corrector):\n    def __init__(self):\n        self.spellchecker = SpellChecker()\n\n    def auto_correct(self, context: str, keystrokes, word: str) -> List[str]:\n        cands = self.spellchecker.candidates(word)\n        return list(cands) if cands is not None else []\n

This keystrokes argument is a list of keystrokes coordinates (one per character of the typed word).

These coordinates may hold useful information : for example on a QWERTY keyboard, if the word typed is lovw but the keystroke for w is very close to the border of the e key... There is a great chance that the word should be auto-corrected to love...

These coordinates are defined in a layout file internally. To interact easily with the layout, you can use the LayoutHelper class.

You can use the method get_key_info() to retrieve data about the key for the given character.

For example, let's compute the distance between the first keystroke of the word, and the key for the character w :

import math\nfrom kebbie.layout import LayoutHelper\n\nlayout = LayoutHelper()\n\ndef auto_correct(self, context: str, keystrokes, word: str) -> List[str]:\n    _, _, w_key_center_x, w_key_center_y, _ = layout.get_key_info(\"w\")\n    if len(keystrokes) > 0 and keystrokes[0] is not None:\n        print(math.dist(keystrokes[0], [w_key_center_x, w_key_center_y]))\n
"},{"location":"usage/#custom-dataset","title":"Custom dataset","text":"

The evaluate() function uses a good default dataset (see Test data) to run the evaluation.

However, you might want to run the evaluation on your own dataset.

You can do this by passing your custom dataset to the evaluate() function :

my_dataset = load_my_private_dataset()\ncorrector = ExampleCorrector()\nresults = evaluate(corrector, dataset=my_dataset)\n

Your custom dataset should be a Dict[str, List[str]], where each keys of the dictionary represents a specific domain, and the values are just the list of sentences.

"},{"location":"usage/#get-insights-on-most-common-mistakes","title":"Get insights on most common mistakes","text":"

When trying to improve your models, you might want to take a look at the most common mistakes your model is doing.

You can achieve this simply by passing track_mistakes=True to the evaluate() function :

corrector = ExampleCorrector()\nresults = evaluate(corrector, track_mistakes=True)\n

It will record the most common mistakes your Corrector is doing, and add them in a new field (most_common_mistakes) in the returned results.

The mistakes are tracked for the following tasks : next-word prediction, auto-completion, and auto-correction.

Let's look at the most common mistakes for our example with pyspellchecker :

\"auto_correction\": [\n    [\n        \"Count\",\n        \"Expected\",\n        \"Predictions\",\n        \"Context\"\n    ],\n    [\n        266,\n        \"I'm\",\n        \"[ism, h'm]\",\n        \"Kolten beckoned Aida over wanting to hear what he had to say Aida I want to know what's on your mind Kolten said I'm\"\n    ],\n    [\n        157,\n        \"to\",\n        \"[tho]\",\n        \"Destanie was so angry that he felt like he might explode He felt the hot blood rushing to his head and his fists clenched tightly at his sides He took a deep breath and tried tho\"\n    ],\n    ...\n

Here we can see that we track several thing for each mistake :

  • Count : The total number of times this mistake happened
  • Expected : The expected word
  • Predictions : The model's predictions
  • Context : An example of a sentence where the mistake happened

So we can see that the most common mistake of pyspellchecker is to try to auto correct I'm into ism, even though it should not be corrected. This mistake was encountered 266 times during the evaluation.

The second most common mistake is to not auto-correct tho, even though it should be corrected to to. This mistake was encountered 157 times during the evaluation.

Tip

By default, the 1 000 most common mistakes will be saved. You can specify a different n, with the n_most_common_mistakes argument :

corrector = ExampleCorrector()\nresults = evaluate(corrector, track_mistakes=True, n_most_common_mistakes=150)\n
"},{"location":"usage/#other-arguments","title":"Other arguments","text":"

Specify the number of processes to be used for multiprocessing with the n_proc argument :

corrector = ExampleCorrector()\nresults = evaluate(corrector, n_proc=4)\n

Note

If None is given, evaluate() will use os.cpu_count() (the number of CPU of your machine). Defaults to None.

Specify a different seed with the seed argument :

corrector = ExampleCorrector()\nresults = evaluate(corrector, seed=36)\n

Specify a different Beta for the F-score calculation (see the Metrics section) with the beta argument :

corrector = ExampleCorrector()\nresults = evaluate(corrector, beta=1.2)\n
"}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Kebbie","text":""},{"location":"#introduction","title":"Introduction","text":"

Welcome to the documentation of the kebbie package.

kebbie is a small framework for testing and benchmarking mobile keyboards. The primary goal of this package is to establish a cohesive and standardized method for evaluating the various NLP capabilities of a mobile keyboard and comparing them to existing alternatives.

This is achieved through two features offered by kebbie :

  • An easy-to-use evaluation function that facilitates the testing of multiple NLP functionalities offered by a mobile keyboard : auto-correction, auto-completion, next-word prediction, and swipe gesture recognition.
  • A command-line interface for running the evaluation on established keyboards, operated within emulator.
"},{"location":"#installation","title":"Installation","text":""},{"location":"#latest-version","title":"Latest version","text":"

You can install the latest version of the package directly from PyPi with :

pip install kebbie\n

Hint

If you want to install directly from Github, run :

pip install git+https://github.com/FleksySDK/kebbie.git\n

"},{"location":"#specific-version","title":"Specific version","text":"

You can install a specific version of the package (0.1.0 in ths example) from PyPi with :

pip install kebbie==0.1.0\n

Hint

If you want to install directly from Github, run :

pip install git+https://github.com/FleksySDK/kebbie.git@v0.1.0\n

"},{"location":"#local","title":"Local","text":"

You can also clone the repository locally and install it manually :

git clone https://github.com/FleksySDK/kebbie.git\ncd kebbie\npip install -e .\n
"},{"location":"#extra-dependencies","title":"Extra dependencies","text":"

You can also install extras dependencies, for example :

pip install -e .[docs]\n

Will install necessary dependencies for building the docs.

Hint

If you installed the package directly from github, run :

pip install \"kebbie[docs] @ git+https://github.com/FleksySDK/kebbie.git\"\n

List of extra dependencies :

  • test : Dependencies for running unit-tests.
  • hook : Dependencies for running pre-commit hooks.
  • lint : Dependencies for running linters and formatters.
  • docs : Dependencies for building the documentation.
  • dev : test + hook + lint + docs.
  • all : All extra dependencies.
"},{"location":"#contribute","title":"Contribute","text":"

To contribute, install the package locally (see Installation), create your own branch, add your code (and tests, and documentation), and open a PR !

"},{"location":"#pre-commit-hooks","title":"Pre-commit hooks","text":"

Pre-commit hooks are set to check the code added whenever you commit something.

When you try to commit your code, hooks are automatically run, and if you code does not meet the quality required by linters, it will not be committed. You then have to fix your code and try to commit again !

Important

If you never ran the hooks before, install it with :

pip install -e .[hook]\npre-commit install\n

Info

You can manually run the pre-commit hooks with :

pre-commit run --all-files\n

"},{"location":"#unit-tests","title":"Unit-tests","text":"

When you contribute, you need to make sure all the unit-tests pass. You should also add tests if necessary !

Info

Install the dependencies for testing with :

pip install -e .[test]\n

You can run the tests with :

pytest\n

Info

Tests are not included in the pre-commit hooks, because running the tests might be slow, and for the sake of developers we want the pre-commit hooks to be fast !

Info

Pre-commit hooks will not run the tests, but it will automatically update the coverage badge !

"},{"location":"#documentation","title":"Documentation","text":"

When you contribute, make sure to keep the documentation up-to-date.

You can visualize the documentation locally by running :

mkdocs serve\n

Info

Before running this command, you need to install the documentation dependencies :

pip install -e .[docs]\n

"},{"location":"architecture/","title":"Architecture","text":"

This page presents the internals and design decisions of the kebbie package.

"},{"location":"architecture/#the-oracle","title":"The Oracle","text":"

The Oracle is the main class of the package.

It's the class that takes care of iterating the dataset, introducing the artifical typos, and calling the given Corrector with the noisy text. Then it scores the results, knowing what was the expected text, and return the aggregated metrics as a result.

Performances

The task is embarassingly parallel. Each sentence can be tested separately. The Oracle leverages multiprocessing to ensure we run the tests as fast as possible.

Reproducibility

Although The Oracle runs in parallel, the evaluation is entirely reproducible and deterministic. Running twice the same evaluation (with the same Corrector and the same parameters) should give you the exact same results.

If you follow the flow of the data, this is what it looks like :

"},{"location":"architecture/#the-noise-model","title":"The Noise Model","text":"

The NoiseModel is the class responsible for introducing artificial typos in a clean text.

This is done in two steps :

  • From a clean word, create a noisy equivalent, which corresponds to a \"cognitive\" typo (i.e. the user might not know the exact spelling of the word)
  • Then from this noisy word, we type each character one by one in a fuzzy way (might type the character next to the intended character), which corresponds to a \"physical\" typo (i.e. fat finger syndrome)

Info

The keystrokes are generated by using two Gaussian distributions (over the X-axis and the Y-axis), centered on the middle of the intended key.

In the end, the output is a noisy version of the word, alongside with the corresponding keystrokes coordinates.

"},{"location":"emu_setup/","title":"Emulator setup","text":""},{"location":"emu_setup/#installing-appium-20","title":"Installing Appium 2.0","text":"

Appium is required to communicate between Python and the emulators.

Install Appium 2.0 by following their official documentation.

Then install the required drivers :

# For Android\nappium driver install uiautomator2\n\n# For iOS\nappium driver install xcuitest\n

To start Appium, open a new terminal and type :

appium\n

Note

Once it's running, don't close the terminal. Appium needs to run in order for Python to communicate with the emulators.

"},{"location":"emu_setup/#setting-up-android-emulator","title":"Setting up Android emulator","text":""},{"location":"emu_setup/#creating-the-emulator","title":"Creating the emulator","text":"
  • Install Android Studio
  • Create a new virtual device
  • Select the phone (Pixel 2 for example) and the system image (Tiramisu - Android 13.0 for example)
"},{"location":"emu_setup/#starting-the-emulator","title":"Starting the emulator","text":"

Once you have created the emulator, you should be able to see its name from the command line :

emulator -list-avds\n
If you encounter command not found: emulator

If the command fails with command not found: emulator, you need to update your path accordingly :

export ANDROID_HOME=/Users/<username>/Library/Android/sdk\nexport PATH=$ANDROID_HOME/platform-tools:$ANDROID_HOME/emulator:$PATH\n

You can start the emulator directly from the command line with : (so you don't need to run Android Studio, which takes a lot of resources)

emulator -avd <name> -no-snapshot-load\n

Once started, make sure you can see it. From another terminal, run :

adb devices\n
If you encounter command not found: adb

If the command fails with command not found: adb, you need to update your path accordingly :

export ANDROID_HOME=/Users/<username>/Library/Android/sdk\nexport PATH=$ANDROID_HOME/platform-tools:$ANDROID_HOME/emulator:$PATH\n

Info

In Android, to open the keyboard, we access a notepad website (www.justnotepad.com).

The reason we do that is because it's the easiest way to access a typing field, and it works across versions and emulators.

"},{"location":"emu_setup/#preparing-gboard","title":"Preparing GBoard","text":"

GBoard is enabled by default on Android, so there is nothing to do.

Tip

You can make sure GBoard is indeed the selected keyboard by going to the Settings -> System -> Languages & Input -> On-screen keyboard.

By default, GBoard has the clipboard enabled, and it may interfere with the layout detection. You can disable the clipboard in the settings of GBoard :

Make sure to disable the clipboard :

Layout

For now, the only layout supported is english US. Make sure this is the layout GBoard is using.

"},{"location":"emu_setup/#preparing-swiftkey","title":"Preparing Swiftkey","text":"

Swiftkey keyboard isn't installed on the emulator by default : you need to install it first.

Note

If you want to run the tests in parallel on several emulators, you need to repeat these steps for each emulator.

Start the emulator, then go to Google, and paste this link to install Swiftkey.

Tip

If the clipboard isn't shared with the emulator, open a terminal and run :

adb shell input text \"https://play.google.com/store/apps/details?id=com.touchtype.swiftkey&hl=en_US&gl=US\"\n

Install the keyboard on your emulator :

Open the app, follow the instructions to activate the keyboard.

By default, Swiftkey has the clipboard enabled, and it may interfere with the layout detection. You can disable the clipboard. First, access the clipboard settings :

And disable the clipboard suggestions :

"},{"location":"emu_setup/#setting-up-ios-emulator","title":"Setting up iOS emulator","text":""},{"location":"emu_setup/#creating-the-emulator_1","title":"Creating the emulator","text":"
  • Install XCode
  • Open WebDriverAgent in Xcode :
    open ~/.appium/node_modules/appium-xcuitest-driver/node_modules/appium-webdriveragent/WebDriverAgent.xcodeproj\n
  • Go to Signing & Capabilities of the project :
  • Then click \"Team\" and select your Apple ID
  • You should do this for the three following targets : WebDriverAgentLib, WebDriverAgentRunner, IntegrationApp.

Now, make sure you can properly build the WebDriverAgentRunner target : select it in the top bar and run it (button \"play\") :

If all the stars are aligned, it should start the emulator !

"},{"location":"emu_setup/#starting-the-emulator_1","title":"Starting the emulator","text":"

Once you have ensured the emulator runs properly, you should be able to start it from the command line (without Xcode open).

First, check the list of emulators available :

xcrun simctl list\n

Example of emulators listed :

-- iOS 17.4 --\n    iPhone SE (3rd generation) (96ADAD77-ECE6-420E-B56C-505E0C16231B) (Shutdown)\n    iPhone 15 (128F95FC-F499-4B09-A3B2-55937BF52B0B) (Shutdown)\n    iPhone 15 Plus (86591FC6-B3E7-43A2-9E9B-D4A2A90DAF31) (Shutdown)\n    iPhone 15 Pro (9D38F87D-273B-4D8F-8AD5-E901C1974C1E) (Shutdown)\n    iPhone 15 Pro Max (15EF57B4-69E6-4369-9534-70692A2023E5) (Shutdown)\n    iPad Air (5th generation) (252D522B-CEAA-4085-BE17-A453BC219755) (Shutdown)\n    iPad (10th generation) (39F2ADD2-2FCF-44C3-9DC9-4CC4D50875E9) (Shutdown)\n    iPad mini (6th generation) (59125B84-4ED1-40C1-8457-3CE824394385) (Shutdown)\n    iPad Pro (11-inch) (4th generation) (DB122D71-F358-48DA-B11C-D25305657E7F) (Shutdown)\n    iPad Pro (12.9-inch) (6th generation) (1100927A-B631-4678-AB19-02EA4F680537) (Shutdown)\n

Then you can start the device you want with :

xcrun simctl boot <UUID>\n

For example, to start iPhone 15 Pro, you should run :

xcrun simctl boot 9D38F87D-273B-4D8F-8AD5-E901C1974C1E\n

Warning

The xcrun simctl boot command only launch the simulator background service, to launch the foreground GUI, run :

open -a Simulator\n

Note

To shutdown the simulator, run :

xcrun simctl shutdown <UUID>\n
"},{"location":"emu_setup/#preparing-ios-keyboard","title":"Preparing iOS Keyboard","text":"

iOS Keyboard is the default keyboard on iOS, so there is nothing to do to enable it.

However, predictions and auto-corrections are disabled by default. They should be enabled :

  • Go to \"Settings\" :

  • Then go to \"General\" :

  • Then go to \"Keyboard\" :

  • Then enable \"Auto-Correction\" and \"Predictive Text\" :

Also, inline predictions are enabled by default, and it may interfere with kebbie. Make sure to turn it off (also in the keyboard settings) :

Layout

For now, the only layout supported is english US. Make sure this is the layout iOS keyboard is using.

"},{"location":"emu_setup/#preparing-fleksy-keyboard","title":"Preparing Fleksy keyboard","text":"

Fleksy is a fully-featured keyboard SDK. A demo keyboard is provided, allowing anyone to test its performance.

You first need to install the keyboard in your simulator. To do this, start your simulator (see Starting the emulator), and then run :

wget https://github.com/FleksySDK/kebbie/files/15290354/Fleksy.zip\nunzip Fleksy.zip\nxcrun simctl install <UUID> Fleksy.app\n

Tip

You can find the UUID of your simulator by running : xcrun simctl list and finding which one is Booted.

Once the app is installed, start it :

Click \"Add Keyboard\" :

Then go to \"General\" :

Then go to \"Keyboard\" :

Then go to \"Keyboards\" :

Then click \"Add New Keyboard\" :

And select \"Fleksy For Research\" :

Then select the Fleksy keyboard you just installed :

And enable \"Full Access\" :

Once enabled, you still need to select the right keyboard ! Open the keyboard using any text field, and hold the switch keyboard key. You can then select the keyboard you want to test :

And similarly to the default iOS keyboard, you should enable predictions and auto-corrections :

  • Go to \"Settings\" :

  • Then go to \"General\" :

  • Then go to \"Keyboard\" :

  • Then enable \"Auto-Correction\" and \"Predictive Text\" :

"},{"location":"emu_setup/#preparing-keyboardkit","title":"Preparing KeyboardKit","text":"

KeyboardKit is an open-source SDK that lets you create a custom keyboard. They provide a demo keyboard that we can use to test its performance.

Before being able to run kebbie evaluate to benchmark KeyboardKit, you need to install the demo keyboard on your simulator.

First, clone the repository and open the project in Xcode :

git clone https://github.com/KeyboardKit/KeyboardKit.git\ncd KeyboardKit\nopen Demo/Demo.xcodeproj\n

Then, from Xcode, select the Demo project, select the right simulator, and press the play button :

It should start the simulator, with KeyboardKit installed.

Once the simulator started, you need to enable the KeyboardKit keyboard and allow full access :

Once enabled, you still need to select the right keyboard ! Open the keyboard using any text field, and hold the switch keyboard key. You can then select the keyboard you want to test :

And similarly to the default iOS keyboard, you should enable predictions and auto-corrections :

  • Go to \"Settings\" :

  • Then go to \"General\" :

  • Then go to \"Keyboard\" :

  • Then enable \"Auto-Correction\" and \"Predictive Text\" :

"},{"location":"emu_setup/#parallel-emulators","title":"Parallel emulators","text":"

In order to run tests faster, we can setup multiple emulators, and run the evaluate() function in parallel. Let's see how to set up multiple emulators for both Android and iOS.

"},{"location":"emu_setup/#android","title":"Android","text":"

First, follow the section above to setup one Android emulator.

Once it's done, you can simply clone it from Android Studio :

Clone it several times. Once the emulators are created, you should be able to list them from the command line :

emulator -list-avds\n

Then open several terminal, and in each terminal open one emulator :

emulator -avd <name> -no-snapshot-load\n

After they started, you should be able to see them with :

adb devices\n

Tip

Once you can see the emulators with the adb devices command, there is nothing else to do ! You can run the kebbie CLI just like you would do for a single emulator : the CLI will detect the running emulators with the adb devices command.

"},{"location":"emu_setup/#ios","title":"iOS","text":"

First, follow the section above to setup one iOS simulator and make sure everything works for a single device.

Once it's done, you can list the device availables :

xcrun simctl list\n

Example of emulators listed :

-- iOS 17.4 --\n    iPhone SE (3rd generation) (96ADAD77-ECE6-420E-B56C-505E0C16231B) (Shutdown)\n    iPhone 15 (128F95FC-F499-4B09-A3B2-55937BF52B0B) (Shutdown)\n    iPhone 15 Plus (86591FC6-B3E7-43A2-9E9B-D4A2A90DAF31) (Shutdown)\n    iPhone 15 Pro (9D38F87D-273B-4D8F-8AD5-E901C1974C1E) (Shutdown)\n    iPhone 15 Pro Max (15EF57B4-69E6-4369-9534-70692A2023E5) (Shutdown)\n    iPad Air (5th generation) (252D522B-CEAA-4085-BE17-A453BC219755) (Shutdown)\n    iPad (10th generation) (39F2ADD2-2FCF-44C3-9DC9-4CC4D50875E9) (Shutdown)\n    iPad mini (6th generation) (59125B84-4ED1-40C1-8457-3CE824394385) (Shutdown)\n    iPad Pro (11-inch) (4th generation) (DB122D71-F358-48DA-B11C-D25305657E7F) (Shutdown)\n    iPad Pro (12.9-inch) (6th generation) (1100927A-B631-4678-AB19-02EA4F680537) (Shutdown)\n

Select the UUID of the device you would like to run in parallel, and clone it with :

xcrun simctl clone <UUID> <new_name>\n

So for example, to have 4 parallel iPhone 15 Pro, you should run :

xcrun simctl clone 9D38F87D-273B-4D8F-8AD5-E901C1974C1E iPhone_15_2\nxcrun simctl clone 9D38F87D-273B-4D8F-8AD5-E901C1974C1E iPhone_15_3\nxcrun simctl clone 9D38F87D-273B-4D8F-8AD5-E901C1974C1E iPhone_15_4\n

Once this is done, you should see them listed when running :

xcrun simctl list\n
-- iOS 17.4 --\n    iPhone SE (3rd generation) (96ADAD77-ECE6-420E-B56C-505E0C16231B) (Shutdown)\n    iPhone 15 (128F95FC-F499-4B09-A3B2-55937BF52B0B) (Shutdown)\n    iPhone 15 Plus (86591FC6-B3E7-43A2-9E9B-D4A2A90DAF31) (Shutdown)\n    iPhone 15 Pro (9D38F87D-273B-4D8F-8AD5-E901C1974C1E) (Booted)\n    iPhone_15_2 (C423F3BC-BC3A-4FFC-B264-C6075B60115F) (Shutdown)\n    iPhone_15_3 (2BEB33D0-8F33-4987-95FC-FD9B7C2BD54D) (Shutdown)\n    iPhone_15_4 (EE0719E9-FF3C-4539-9BCD-9F091B469F93) (Shutdown)\n    iPhone 15 Pro Max (15EF57B4-69E6-4369-9534-70692A2023E5) (Shutdown)\n    iPad Air (5th generation) (252D522B-CEAA-4085-BE17-A453BC219755) (Shutdown)\n    iPad (10th generation) (39F2ADD2-2FCF-44C3-9DC9-4CC4D50875E9) (Shutdown)\n    iPad mini (6th generation) (59125B84-4ED1-40C1-8457-3CE824394385) (Shutdown)\n    iPad Pro (11-inch) (4th generation) (DB122D71-F358-48DA-B11C-D25305657E7F) (Shutdown)\n    iPad Pro (12.9-inch) (6th generation) (1100927A-B631-4678-AB19-02EA4F680537) (Shutdown)\n

Then you can start each simulator with :

xcrun simctl boot <UUID>\n

For example, to start the 4 simulators we just created, you would run :

xcrun simctl boot 9D38F87D-273B-4D8F-8AD5-E901C1974C1E\nxcrun simctl boot C423F3BC-BC3A-4FFC-B264-C6075B60115F\nxcrun simctl boot 2BEB33D0-8F33-4987-95FC-FD9B7C2BD54D\nxcrun simctl boot EE0719E9-FF3C-4539-9BCD-9F091B469F93\n

Tip

Once the simulators started, there is nothing else to do ! You can run the kebbie CLI just like you would do for a single emulator : the CLI will automatically detect the running emulators with the xcrun simctl list command.

However, make sure to enable auto-correction and predictive suggestions in each of the simulator (see Preparing the iOS Keyboard for more information)

Warning

The xcrun simctl boot command only launch the simulator background service, to launch the foreground GUI, run :

open -a Simulator\n

Note

To shutdown a simulator, run :

xcrun simctl shutdown <UUID>\n
"},{"location":"emulated_keyboard/","title":"Emulated keyboards","text":"

In Usage, we saw how to use the kebbie framework to test our code and get various metrics to understand how good our custom auto-correction was.

Now, let's see how to use the kebbie CLI to run similar tests on an existing keyboard (within an emulator) such as GBoard.

"},{"location":"emulated_keyboard/#setup","title":"Setup","text":"

First, you need to install and setup Appium and the emulators.

Follow the intructions in Emulator setup.

Once everything you need is installed, you should have the following running :

  • Appium in a terminal
  • At least one emulator
"},{"location":"emulated_keyboard/#layout-detection","title":"Layout detection","text":"

kebbie tries to automatically detect the layout of the keyboard in use. It is working for GBoard or iOS keyboard for example.

But some keyboards cannot be detected automatically. In this case we rely on a manual definition of the layout.

But these manual definitions of the layout may not fit all devices.

"},{"location":"emulated_keyboard/#showing-the-layout","title":"Showing the layout","text":"

kebbie provides a CLI to check the layout. To visualize the keyboard's layout, run the show_layout command. For example for GBoard :

kebbie show_layout -K gboard\n

It will display 3 images (one for each layer of the keyboard : lowercase, uppercase, numbers), so you can see if the layout (automatically detected or manually defined) fits the current keyboard. You can leave the images by pressing any key.

Info

Before leaving, the command will also display in the terminal the detected suggestions of the keyboard. If they don't correspond to what's displayed in the emulator, something might be wrong !

For auto-detected keyboards, these suggestions are retrieved directly from the XML tree (fast and accurate). For keyboards with manual layout, we use OCR to find the suggestions (slow and may be wrong).

Tip

If you have several emulators running, the show_layout command will find and display the layout for each emulator, one by one.

Example where the layout match the keys properly :

Example where the layout doesn't match the keyboard's keys :

If it doesn't match...

You need to modify the definition of the layout (in emulator.py), and experiment with new coordinates until it matches well...

"},{"location":"emulated_keyboard/#list-of-supported-keyboards","title":"List of supported keyboards","text":"

Here is the list of keyboards for which the layout auto-detection is supported :

  • GBoard, with the -K gboard argument
  • iOS keyboard, with the -K ios argument
  • KeyboardKit Pro, with the -K kbkitpro argument
  • KeyboardKit Open-source, with the -K kbkitoss argument
  • Tappa keyboard, with the -K tappa argument
"},{"location":"emulated_keyboard/#testing-the-keyboard","title":"Testing the keyboard","text":"

After you made sure the layout is properly detected / defined, it's time to run the tests !

Simply run :

# For GBoard on Android emulator\nkebbie evaluate -K gboard --all_tasks\n\n# For iOS keyboard on iOS emulator\nkebbie evaluate -K ios --all_tasks\n

After a while, you should see the emulator start typing sentences !

The command line will type the sentences from the test data, and record the suggestions and the auto-corrections from the keyboard.

Once all sentences are tested, the results will be saved in a file results.json.

Info

The evaluate CLI will use only 100 sentences of the test data (versus 2 000 by default for the evaluate() function, see Usage).

This is because typing on an emulated keyboard is significantly slower. 100 sentences is enough to get some good, comparable metrics.

Note that we specified the option --all_tasks. With this option, we are computing the results for all of the tasks supported by the emulator : auto-correction, auto-completion, and next-word prediction.

Unsupported

For now, swipe gesture recognition is not supported for the emulated keyboards.

The default behavior (when --all_tasks is not specified) is to run only the auto-correction task. It is significantly faster, specially for keyboards with a layout defined manually, because they require OCR, which is quite slow.

If you want to change the number of sentences the CLI run on, just use the option --n_sentences :

kebbie evaluate -K gboard --all_tasks --n_sentences 10\n

You can change the destination file for the results with the option --result_file :

kebbie evaluate -K gboard --all_tasks --result_file my/folder/evaluation_results.json\n

You can track the most common mistakes with the option --track_mistakes :

kebbie evaluate -K gboard --all_tasks --track_mistakes\n

It will save the most common mistakes in the result file.

"},{"location":"how_testing_is_done/","title":"How testing is done ?","text":"

The basic idea is simple : we take a dataset of english sentences, we corrupt these sentences by introducing artificially generated typos, and then we measure how these typos are corrected.

"},{"location":"how_testing_is_done/#artificial-typos","title":"Artificial typos","text":"

To introduce typos in the clean text, we simulate all possible typos that a human typing on a mobile keyboard could do. This include :

  • Characters additions / deletions
  • Characters transpositions
  • Accent simplifications
  • Case simplifications
  • Fat-finger syndrome (fuzzy typing)
  • Common typos (sampled from a dataset of most common typos)

We use the following typo rates :

  • Character transpositions : 1% of all characters
  • Character additions : 0.5% of all characters
  • Character deletions : 0.5% of all characters
  • Space deletions : 1% of all space characters
  • Symbol deletions : 10% of symbol characters
  • Accent simplification : 8% of accented characters
  • Case simplification : 8% of uppercased characters
  • Common typos : 5% of words

With these rates, we obtain an overall typo rate of 12%.

Sources

These rates come from studies on real-human typing habits : Reference #1, Reference #2.

Particularly, Reference #1 (which focus on mobile device typing) shows that typing on mobile devices leads to 2.3% of uncorrected errors (see introduction), and 8% of words autocorrected (see Intelligent text entry, page 8), for an overall typo rate of 10.3%.

Details

Additionally to these typo rates, we further modify the probabilities :

  • FRONT_DELETION_MULTIPLIER is used to reduce the probability of a deletion happening on the first character of the word. This number was computed after analyzing the Tweeter typo corpus (see this script)

Here is a few examples of sentences before and after introducing typos :

Clean sentence Corrupted sentence Typos introduced He went hiking and said he'd think about it; never came back. He went hikimg and said hed think about it; never came back. Fuzzy typing & Symbol deletion Like, what you're doing here and what all this stuff is. Like, what you're doinghere and waht all this stuff is. Space deletion & Character transposition You must do something about yourself. You must do something about yourself. That's the way to get rid of pests like that. That's the waj to get rid of pedts like thhat. Common typo & Fuzzy typing & Character addition He obviously wanted an ally. he obviously wanted an ally. Case simplification This is all we got between us and the Almighty! This is lal we got beween us and the Almgihty! 2 x Character transposition & Character deletion"},{"location":"how_testing_is_done/#swipe-gesture-generation","title":"Swipe gesture generation","text":"

For the task of swipe gesture resolution, the input is not simple text : we need to generate a swipe gesture.

When generating fuzzy typing typo, we sample key taps positions on the keyboard, using Gaussian distributions, and use these key taps position to see if the correct character was typed, or if a neighbor key was typed.

For generating the swipe gesture, we sample some key taps positions just like we do for fuzzy typing, and then link the different keystrokes of the word using bezier curves. Some randomness on the speed & acceleration between points is added, in order to generate more natural swipe gestures.

Here is some examples of the generated swipe gestures (in red are the keystrokes generated by the fuzzy typing, in blue the points of the corresponding swipe gesture created).

For the word gives :

For the word they :

"},{"location":"how_testing_is_done/#data","title":"Data","text":""},{"location":"how_testing_is_done/#test-data","title":"Test data","text":"

For the data, we use the test set of the SODA dataset.

We chose to use this dataset for the evaluation for several reasons :

  • Recent
  • Extremely clean dataset
  • Cover two very distinct domains (narrative & dialogue)
"},{"location":"how_testing_is_done/#common-typos-dataset","title":"Common typos dataset","text":"

As mentioned in the section Artificial typos, we rely on a dataset of common typos, and use these common typos when generating plausible typos.

The dataset of common typos that we use is the Twitter Typo Corpus.

"},{"location":"how_testing_is_done/#tasks","title":"Tasks","text":"

We test the most important NLP features of a mobile keyboards. These are :

  • Auto-correction: Corrects the words typed by the user. For example, if a user types I\u2019m especialy touched, the typo should be detected and corrected to I\u2019m especially touched.
  • Auto-completion: Completes the word typed by the user. For example, if a user types I love y, the word should be auto-completed to I love you.
  • Next-word prediction: Predicts the next word to be typed. For example, if a user types I want to eat french, a probable next word can be fries.
  • Swipe gesture resolution: Predicts the intended word from a swipe gesture.
"},{"location":"how_testing_is_done/#metrics","title":"Metrics","text":"

If you look into the results from kebbie, for each task we have a handful of metrics that help us understand how good the tested keyboard is. Let's look at the details of these metrics.

"},{"location":"how_testing_is_done/#formulas","title":"Formulas","text":""},{"location":"how_testing_is_done/#next-word-prediction-swipe-resolution-auto-completion","title":"Next-word prediction, swipe resolution, auto-completion","text":"

For these three tasks, the metric used is Accuracy.

The formula is : accuracy = correct / total

Where correct is the number of correct predictions, and total the total number of predictions.

For the next-word prediction task and auto-completion task, we use top-3 accuracy as the main reference metric. It\u2019s the same as accuracy, but instead of considering only one candidate (which is either correct or not), we consider the 3 most probable candidates (if any one of these 3 candidates is correct).

The reason for this is because the next-word predictions and auto-completion predictions are not \u201cforced\u201d upon the user : 3 predictions are displayed at the top of the keyboard, and the user can choose any of the prediction displayed. So the correct prediction should appear among these 3 predictions displayed.

For swipe resolution however, only the best prediction is selected and applied. So we use accuracy as the main reference metric (and not top-3 accuracy).

"},{"location":"how_testing_is_done/#auto-correction","title":"Auto-correction","text":"

For auto-correction, it\u2019s different. We have a notion of true/false positive/negative. Let\u2019s first define these notions :

  • True Negative : No typo introduced, the model doesn\u2019t correct anything
  • False Positive : No typo introduced, but the model correct (wrongly) the word
  • True Positive : A typo is introduced, the model correct the word into the expected word
  • False Negative : A typo is introduced, but the model doesn\u2019t correct anything

With an example it\u2019s easier to visualize :

Word typed by the user Word after being corrected by the model Expected word True Negative love love love False Positive love loev love True Positive loev love love False Negative loev loev love

From these notions, we can compute the following metrics : accuracy, precision, recall, F-score, using the following formulas :

accuracy = (tp + tn) / (tp + tn + fp + fn)

precision = tp / (tp + fp)

recall = tp / (tp + fn)

f_score = 2 * (precision * recall) / (precision + recall)

Note

F-score is the harmonic mean of precision and recall. It\u2019s a way to gather both precision and recall in a single metric.

Important

Actually we use F\u03b2-score, which is a variant of the F-score where we can use a constant \u03b2 to weight the precision/recall ratio (see the wikipedia page about F-score).

This is useful because we value precision more.

We currently use \u03b2 = 0.9, which means precision has slightly more weight than recall.

"},{"location":"how_testing_is_done/#understanding-the-metrics","title":"Understanding the metrics","text":""},{"location":"how_testing_is_done/#swipe-resolution","title":"Swipe resolution","text":"

Accuracy - [0 - 1] - higher is better

Accuracy is straightforward : this is the ratio of correct predictions.

So an accuracy of 0.8 means the model correctly predicted the word being swiped 80% of the time.

"},{"location":"how_testing_is_done/#next-word-prediction-auto-completion","title":"Next-word prediction & auto-completion","text":"

Top-3 accuracy - [0 - 1] - higher is better

Same as accuracy, but 3 candidates are considered.

So a top-3 accuracy of 0.6 means that within the 3 candidates predicted by the model, the next word (or the word completion) is in these 3 candidates 60% of the time.

"},{"location":"how_testing_is_done/#auto-correction_1","title":"Auto-correction","text":"

Precision - [0 - 1] - higher is better

Precision is the ratio of typos among what is corrected by the model.

So a precision of 0.7 means that among all corrections made by the model, 70% were actually typos (and 30% were correct words that didn\u2019t need to be corrected).

A low precision means many words are corrected when they should not, and a high precision means only actual typos are corrected.

Recall - [0 - 1] - higher is better

Recall is the ratio of typos detected by the model.

So a recall of 0.65 means that the model correctly detected 65% of typos (and 35% of typos were not corrected by the model).

A low recall is symptom that most typos are not detected, and a high recall means most of typos are detected as typos.

F-score - [0 - 1] - higher is better

F-score is the harmonic mean of precision and recall, it\u2019s just a way to gather both precision and recall in a single metric.

Note that we weight precision slightly more than recall.

"},{"location":"internals/","title":"Internals","text":""},{"location":"internals/#cmdpy","title":"cmd.py","text":"

Module containing the implementation for the kebbie command line.

"},{"location":"internals/#kebbie.cmd.instantiate_correctors","title":"instantiate_correctors(keyboard, fast_mode=True, instantiate_emulator=True)","text":"

Create the right correctors (with the right platform, etc...) given the arguments from the command line.

Parameters:

Name Type Description Default keyboard str

Name fo the keyboard to load.

required fast_mode bool

If True, the corrector will be instantiated in fast mode (only AC).

True instantiate_emulator bool

If True, the emulators are instantiated (which trigger the layout detection). If False, only the corrector is instantiated, not the emulator.

True

Returns:

Type Description List[EmulatorCorrector]

The list of created Correctors.

Source code in kebbie/cmd.py
def instantiate_correctors(\n    keyboard: str, fast_mode: bool = True, instantiate_emulator: bool = True\n) -> List[EmulatorCorrector]:\n    \"\"\"Create the right correctors (with the right platform, etc...) given the\n    arguments from the command line.\n\n    Args:\n        keyboard (str): Name fo the keyboard to load.\n        fast_mode (bool, optional): If `True`, the corrector will be\n            instantiated in fast mode (only AC).\n        instantiate_emulator (bool, optional): If `True`, the emulators are\n            instantiated (which trigger the layout detection). If `False`, only\n            the corrector is instantiated, not the emulator.\n\n    Returns:\n        The list of created Correctors.\n    \"\"\"\n    if keyboard in [\"gboard\", \"tappa\", \"swiftkey\"]:\n        # Android keyboards\n        return [\n            EmulatorCorrector(\n                device=d,\n                platform=\"android\",\n                keyboard=keyboard,\n                fast_mode=fast_mode,\n                instantiate_emulator=instantiate_emulator,\n            )\n            for d in Emulator.get_android_devices()\n        ]\n    else:\n        # iOS keyboards\n        return [\n            EmulatorCorrector(\n                device=i,\n                platform=\"ios\",\n                keyboard=keyboard,\n                fast_mode=fast_mode,\n                instantiate_emulator=instantiate_emulator,\n                ios_name=ios_name,\n                ios_platform=ios_platform,\n            )\n            for i, (ios_platform, ios_name) in enumerate(Emulator.get_ios_devices())\n        ]\n
"},{"location":"internals/#kebbie.cmd.common_args","title":"common_args(parser)","text":"

Add common arguments to the given parser.

Parameters:

Name Type Description Default parser ArgumentParser

Parser where to add the arguments.

required Source code in kebbie/cmd.py
def common_args(parser: argparse.ArgumentParser):\n    \"\"\"Add common arguments to the given parser.\n\n    Args:\n        parser (argparse.ArgumentParser): Parser where to add the arguments.\n    \"\"\"\n    parser.add_argument(\n        \"--keyboard\",\n        \"-K\",\n        dest=\"keyboard\",\n        type=str,\n        required=True,\n        choices=[\"gboard\", \"ios\", \"kbkitpro\", \"kbkitoss\", \"tappa\", \"fleksy\", \"swiftkey\"],\n        help=\"Which keyboard, to be tested, is currently installed on the emulator.\",\n    )\n
"},{"location":"internals/#kebbie.cmd.cli","title":"cli()","text":"

Entry-point of the kebbie command line.

Source code in kebbie/cmd.py
def cli():\n    \"\"\"Entry-point of the `kebbie` command line.\"\"\"\n    # create the top-level parser\n    parser = argparse.ArgumentParser(description=\"Kebbie's command line.\")\n    subparsers = parser.add_subparsers(title=\"commands\", dest=\"cmd\")\n\n    evaluate_parser = subparsers.add_parser(\"evaluate\", help=\"Run the evaluation using emulated keyboard.\")\n    evaluate_parser.set_defaults(cmd=\"evaluate\")\n    common_args(evaluate_parser)\n    evaluate_parser.add_argument(\n        \"--result_file\",\n        \"-R\",\n        dest=\"result_file\",\n        type=str,\n        default=\"results.json\",\n        help=\"When to save the results of the evaluation\",\n    )\n    evaluate_parser.add_argument(\n        \"--all_tasks\",\n        \"-A\",\n        dest=\"all_tasks\",\n        action=\"store_true\",\n        default=False,\n        help=\"If specified, all tasks are evaluated (not only auto-correction, but also auto-completion and \"\n        \"next-word prediction).\",\n    )\n    evaluate_parser.add_argument(\n        \"--n_sentences\",\n        \"-N\",\n        dest=\"n_sentences\",\n        type=int,\n        default=100,\n        help=\"The number of sentences to use for the evaluation. Emulated keyboard are slow, so we can't run on the \"\n        \"full test set. Instead we pick the first N sentences.\",\n    )\n    evaluate_parser.add_argument(\n        \"--track_mistakes\",\n        \"-T\",\n        dest=\"track_mistakes\",\n        action=\"store_true\",\n        default=False,\n        help=\"If specified, mistakes will be tracked and saved in the result file.\",\n    )\n\n    layout_parser = subparsers.add_parser(\n        \"show_layout\", help=\"Display the layout over the keyboard for debugging purpose.\"\n    )\n    layout_parser.set_defaults(cmd=\"show_layout\")\n    common_args(layout_parser)\n\n    args = parser.parse_args()\n\n    if args.cmd is None:\n        parser.print_help(sys.stderr)\n        sys.exit(1)\n    elif args.cmd == \"evaluate\":\n        correctors = instantiate_correctors(args.keyboard, fast_mode=not args.all_tasks, instantiate_emulator=False)\n\n        # Get dataset, and filter it to keep only a small number of sentences\n        dataset = get_soda_dataset(args.n_sentences)\n\n        # Run the evaluation\n        results = evaluate(correctors, dataset=dataset, track_mistakes=args.track_mistakes)\n\n        # Save the results in a file\n        with open(args.result_file, \"w\", encoding=\"utf-8\") as f:\n            json.dump(results, f, ensure_ascii=False, indent=4)\n\n        print(\"Overall score : \", results[\"overall_score\"])\n\n    elif args.cmd == \"show_layout\":\n        correctors = instantiate_correctors(args.keyboard)\n        for c in correctors:\n            c.emulator.show_keyboards()\n            print(f\"Predictions : {c.emulator.get_predictions()}\")\n
"},{"location":"internals/#correctorspy","title":"correctors.py","text":"

Module containing the base Corrector class.

"},{"location":"internals/#kebbie.correctors.EmulatorCorrector","title":"EmulatorCorrector","text":"

Bases: Corrector

Corrector using an emulated keyboard.

Parameters:

Name Type Description Default platform str

Name of the platform used. android or ios.

required keyboard str

Name of the keyboard to test.

required device str

Device UDID to use for the emulator.

None fast_mode bool

If True, only auto-correction will be tested, and suggestions will not be retrieved. This is faster because we don't take screenshot and run the OCR.

True instantiate_emulator bool

If False, the emulator is not initialized (It will only be initialized after being pickled). This is useful to quickly create instances of this class, without going through the whole layout detection (which takes time) 2 times : at initialization and after being pickled.

True Source code in kebbie/correctors.py
class EmulatorCorrector(Corrector):\n    \"\"\"Corrector using an emulated keyboard.\n\n    Args:\n        platform (str): Name of the platform used. `android` or `ios`.\n        keyboard (str): Name of the keyboard to test.\n        device (str): Device UDID to use for the emulator.\n        fast_mode (bool): If `True`, only auto-correction will be tested,\n            and suggestions will not be retrieved. This is faster because\n            we don't take screenshot and run the OCR.\n        instantiate_emulator (bool): If `False`, the emulator is not\n            initialized (It will only be initialized after being pickled).\n            This is useful to quickly create instances of this class,\n            without going through the whole layout detection (which takes\n            time) 2 times : at initialization and after being pickled.\n    \"\"\"\n\n    def __init__(\n        self,\n        platform: str,\n        keyboard: str,\n        device: str = None,\n        fast_mode: bool = True,\n        ios_name: str = None,\n        ios_platform: str = None,\n        instantiate_emulator: bool = True,\n    ):\n        super().__init__()\n\n        self.platform = platform\n        self.keyboard = keyboard\n        self.device = device\n        self.fast_mode = fast_mode\n        self.ios_name = ios_name\n        self.ios_platform = ios_platform\n\n        self.emulator = None\n        if instantiate_emulator:\n            self.emulator = Emulator(\n                self.platform,\n                self.keyboard,\n                device=self.device,\n                ios_name=self.ios_name,\n                ios_platform=self.ios_platform,\n            )\n\n        # Typing on keyboard is slow. Because we go through several AC calls\n        # in one sentence, keep track of the previously typed context, so we\n        # can just type the remaining characters\n        self.previous_context = \"\"\n\n    def __reduce__(self) -> Tuple:\n        \"\"\"This method simply makes the object pickable.\n\n        Returns:\n            Tuple of callable and arguments.\n        \"\"\"\n        return (\n            self.__class__,\n            (self.platform, self.keyboard, self.device, self.fast_mode, self.ios_name, self.ios_platform),\n        )\n\n    def cached_type(self, context: str, word: str):\n        \"\"\"This class keeps track of the content of the context currently\n        typed in the emulator. This method uses this current context to\n        determine if we need to retype the sentence or not. Instead of\n        always erasing the content being typed, we can directly type the\n        remaining characters, which saves up time.\n\n        Args:\n            context (str): Context to paste.\n            word (str): Word to type.\n        \"\"\"\n        sentence = context + word\n        if sentence.startswith(self.previous_context):\n            # The sentence to type start similarly as the previous context\n            # Don't retype everything, just what we need\n            self.emulator.type_characters(sentence[len(self.previous_context) :])\n        else:\n            # The previous context is not right, erase everything and type it\n            self.emulator.paste(context)\n            self.emulator.type_characters(word)\n        self.previous_context = sentence\n\n    def auto_correct(\n        self,\n        context: str,\n        keystrokes: List[Optional[Tuple[float, float]]],\n        word: str,\n    ) -> List[str]:\n        \"\"\"Implementation of `auto_correct` method for emulated keyboards.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n            keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n                (x and y coordinates) for each keystroke of the word being\n                typed.\n            word (str): Word being typed (corresponding to the keystrokes).\n\n        Returns:\n            The list of correction candidates.\n        \"\"\"\n        self.cached_type(context, word)\n        candidates = self.emulator.get_predictions() if not self.fast_mode else []\n\n        candidates = [c for c in candidates if c != \"\"]\n\n        # On keyboard, the leftmost candidate is the word being typed without\n        # any change. If the word doesn't have a typo, this first candidate\n        # should be kept as the auto-correction, but if the word has a typo,\n        # we should remove it from the candidates list (as it will be\n        # auto-corrected).\n        # In order to know if it will be auto-corrected or not, we have no\n        # choice but type a space and retrieve the current text to see if it\n        # was auto-corrected or not.\n        self.emulator.type_characters(\" \")\n        self.previous_context = self.emulator.get_text()\n        autocorrection = self.previous_context[len(context) :].strip()\n\n        if len(candidates) == 0:\n            candidates = [autocorrection]\n        elif candidates[0] != autocorrection:\n            candidates.pop(0)\n            if autocorrection not in candidates:\n                candidates.insert(0, autocorrection)\n\n        return candidates\n\n    def auto_complete(\n        self,\n        context: str,\n        keystrokes: List[Optional[Tuple[float, float]]],\n        partial_word: str,\n    ) -> List[str]:\n        \"\"\"Implementation of `auto_complete` method for emulated keyboards.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n            keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n                (x and y coordinates) for each keystroke of the word being\n                typed.\n            partial_word (str): Partial word being typed (corresponding to the\n                keystrokes).\n\n        Returns:\n            The list of completion candidates.\n        \"\"\"\n        if self.fast_mode:\n            return []\n\n        self.cached_type(context, partial_word)\n        candidates = self.emulator.get_predictions()\n\n        candidates = [c for c in candidates if c != \"\"]\n\n        return candidates\n\n    def predict_next_word(self, context: str) -> List[str]:\n        \"\"\"Implementation of `predict_next_word` method for emulated keyboards.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n\n        Returns:\n            The list of next-word candidates.\n        \"\"\"\n        if self.fast_mode:\n            return []\n\n        # In order to get the predictions, the space should be typed\n        assert context[-1] == \" \"\n        self.cached_type(context[:-1], \" \")\n        candidates = self.emulator.get_predictions()\n        candidates = [c for c in candidates if c != \"\"]\n\n        return candidates\n
"},{"location":"internals/#kebbie.correctors.EmulatorCorrector.__reduce__","title":"__reduce__()","text":"

This method simply makes the object pickable.

Returns:

Type Description Tuple

Tuple of callable and arguments.

Source code in kebbie/correctors.py
def __reduce__(self) -> Tuple:\n    \"\"\"This method simply makes the object pickable.\n\n    Returns:\n        Tuple of callable and arguments.\n    \"\"\"\n    return (\n        self.__class__,\n        (self.platform, self.keyboard, self.device, self.fast_mode, self.ios_name, self.ios_platform),\n    )\n
"},{"location":"internals/#kebbie.correctors.EmulatorCorrector.cached_type","title":"cached_type(context, word)","text":"

This class keeps track of the content of the context currently typed in the emulator. This method uses this current context to determine if we need to retype the sentence or not. Instead of always erasing the content being typed, we can directly type the remaining characters, which saves up time.

Parameters:

Name Type Description Default context str

Context to paste.

required word str

Word to type.

required Source code in kebbie/correctors.py
def cached_type(self, context: str, word: str):\n    \"\"\"This class keeps track of the content of the context currently\n    typed in the emulator. This method uses this current context to\n    determine if we need to retype the sentence or not. Instead of\n    always erasing the content being typed, we can directly type the\n    remaining characters, which saves up time.\n\n    Args:\n        context (str): Context to paste.\n        word (str): Word to type.\n    \"\"\"\n    sentence = context + word\n    if sentence.startswith(self.previous_context):\n        # The sentence to type start similarly as the previous context\n        # Don't retype everything, just what we need\n        self.emulator.type_characters(sentence[len(self.previous_context) :])\n    else:\n        # The previous context is not right, erase everything and type it\n        self.emulator.paste(context)\n        self.emulator.type_characters(word)\n    self.previous_context = sentence\n
"},{"location":"internals/#kebbie.correctors.EmulatorCorrector.auto_correct","title":"auto_correct(context, keystrokes, word)","text":"

Implementation of auto_correct method for emulated keyboards.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required keystrokes List[Optional[Tuple[float, float]]]

List of positions (x and y coordinates) for each keystroke of the word being typed.

required word str

Word being typed (corresponding to the keystrokes).

required

Returns:

Type Description List[str]

The list of correction candidates.

Source code in kebbie/correctors.py
def auto_correct(\n    self,\n    context: str,\n    keystrokes: List[Optional[Tuple[float, float]]],\n    word: str,\n) -> List[str]:\n    \"\"\"Implementation of `auto_correct` method for emulated keyboards.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n        keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n            (x and y coordinates) for each keystroke of the word being\n            typed.\n        word (str): Word being typed (corresponding to the keystrokes).\n\n    Returns:\n        The list of correction candidates.\n    \"\"\"\n    self.cached_type(context, word)\n    candidates = self.emulator.get_predictions() if not self.fast_mode else []\n\n    candidates = [c for c in candidates if c != \"\"]\n\n    # On keyboard, the leftmost candidate is the word being typed without\n    # any change. If the word doesn't have a typo, this first candidate\n    # should be kept as the auto-correction, but if the word has a typo,\n    # we should remove it from the candidates list (as it will be\n    # auto-corrected).\n    # In order to know if it will be auto-corrected or not, we have no\n    # choice but type a space and retrieve the current text to see if it\n    # was auto-corrected or not.\n    self.emulator.type_characters(\" \")\n    self.previous_context = self.emulator.get_text()\n    autocorrection = self.previous_context[len(context) :].strip()\n\n    if len(candidates) == 0:\n        candidates = [autocorrection]\n    elif candidates[0] != autocorrection:\n        candidates.pop(0)\n        if autocorrection not in candidates:\n            candidates.insert(0, autocorrection)\n\n    return candidates\n
"},{"location":"internals/#kebbie.correctors.EmulatorCorrector.auto_complete","title":"auto_complete(context, keystrokes, partial_word)","text":"

Implementation of auto_complete method for emulated keyboards.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required keystrokes List[Optional[Tuple[float, float]]]

List of positions (x and y coordinates) for each keystroke of the word being typed.

required partial_word str

Partial word being typed (corresponding to the keystrokes).

required

Returns:

Type Description List[str]

The list of completion candidates.

Source code in kebbie/correctors.py
def auto_complete(\n    self,\n    context: str,\n    keystrokes: List[Optional[Tuple[float, float]]],\n    partial_word: str,\n) -> List[str]:\n    \"\"\"Implementation of `auto_complete` method for emulated keyboards.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n        keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n            (x and y coordinates) for each keystroke of the word being\n            typed.\n        partial_word (str): Partial word being typed (corresponding to the\n            keystrokes).\n\n    Returns:\n        The list of completion candidates.\n    \"\"\"\n    if self.fast_mode:\n        return []\n\n    self.cached_type(context, partial_word)\n    candidates = self.emulator.get_predictions()\n\n    candidates = [c for c in candidates if c != \"\"]\n\n    return candidates\n
"},{"location":"internals/#kebbie.correctors.EmulatorCorrector.predict_next_word","title":"predict_next_word(context)","text":"

Implementation of predict_next_word method for emulated keyboards.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required

Returns:

Type Description List[str]

The list of next-word candidates.

Source code in kebbie/correctors.py
def predict_next_word(self, context: str) -> List[str]:\n    \"\"\"Implementation of `predict_next_word` method for emulated keyboards.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n\n    Returns:\n        The list of next-word candidates.\n    \"\"\"\n    if self.fast_mode:\n        return []\n\n    # In order to get the predictions, the space should be typed\n    assert context[-1] == \" \"\n    self.cached_type(context[:-1], \" \")\n    candidates = self.emulator.get_predictions()\n    candidates = [c for c in candidates if c != \"\"]\n\n    return candidates\n
"},{"location":"internals/#emulatorpy","title":"emulator.py","text":"

Module containing the code necessary to interact with the emulators, using Appium.

"},{"location":"internals/#kebbie.emulator.Emulator","title":"Emulator","text":"

Class used to interact with an emulator and type word on a given keyboard.

Parameters:

Name Type Description Default platform str

android or ios.

required keyboard str

The name of the keyboard installed on the emulator. This is needed because each keyboard has a different layout, and we need to know each key's position in order to type words.

required device str

Device UDID to use.

None host str

Appium server's address.

'127.0.0.1' port str

Appium server's port.

'4723'

Raises:

Type Description ValueError

Error raised if the given platform doesn't exist.

Source code in kebbie/emulator.py
class Emulator:\n    \"\"\"Class used to interact with an emulator and type word on a given keyboard.\n\n    Args:\n        platform (str): `android` or `ios`.\n        keyboard (str): The name of the keyboard installed on the emulator.\n            This is needed because each keyboard has a different layout, and we\n            need to know each key's position in order to type words.\n        device (str, optional): Device UDID to use.\n        host (str, optional): Appium server's address.\n        port (str, optional): Appium server's port.\n\n    Raises:\n        ValueError: Error raised if the given platform doesn't exist.\n    \"\"\"\n\n    def __init__(  # noqa: C901\n        self,\n        platform: str,\n        keyboard: str,\n        device: str = None,\n        host: str = \"127.0.0.1\",\n        port: str = \"4723\",\n        ios_name: str = None,\n        ios_platform: str = None,\n    ):\n        super().__init__()\n\n        self.platform = platform.lower()\n        if self.platform not in [ANDROID, IOS]:\n            raise ValueError(f\"Unknown platform : {self.platform}. Please specify `{ANDROID}` or `{IOS}`.\")\n\n        # Start appium\n        capabilities = ANDROID_CAPABILITIES if self.platform == ANDROID else IOS_CAPABILITIES\n        if self.platform == IOS:\n            capabilities[\"deviceName\"] = ios_name\n            capabilities[\"platformVersion\"] = ios_platform\n            capabilities[\"wdaLocalPort\"] = 8000 + (device if device is not None else 0)\n        if self.platform == ANDROID and device is not None:\n            capabilities[\"udid\"] = device\n        self.driver = webdriver.Remote(f\"{host}:{port}\", capabilities)\n        self.driver.implicitly_wait(20)\n\n        self.screen_size = self.driver.get_window_size()\n\n        self.keyboard = keyboard.lower()\n\n        # Access a typing field\n        self.typing_field = None\n        self._access_typing_field()\n\n        # Keep track of the keyboard behavior\n        # When the typing field is empty, the keyboard is uppercase by default\n        self.kb_is_upper = True\n        self.last_char_is_space = False\n        self.last_char_is_eos = False\n\n        # Set the keyboard as default\n        if self.platform == ANDROID:\n            self.select_keyboard(keyboard)\n\n        # Get the right layout\n        if self.keyboard == GBOARD:\n            self.detected = GboardLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        elif self.keyboard == TAPPA:\n            self.detected = TappaLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        elif self.keyboard == FLEKSY:\n            self.detected = FleksyLayoutDetector(self.driver)\n            self.layout = self.detected.layout\n        elif self.keyboard == IOS:\n            self.detected = IosLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        elif self.keyboard == KBKITPRO:\n            self.detected = KbkitproLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        elif self.keyboard == KBKITOSS:\n            self.detected = KbkitossLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        elif self.keyboard == SWIFTKEY:\n            self.detected = SwiftkeyLayoutDetector(self.driver, self._tap)\n            self.layout = self.detected.layout\n        else:\n            raise ValueError(\n                f\"Unknown keyboard : {self.keyboard}. Please specify `{GBOARD}`, `{TAPPA}`, `{FLEKSY}`, \"\n                f\"`{SWIFTKEY}`, `{KBKITPRO}`, `{KBKITOSS}` or `{IOS}`.\"\n            )\n\n        self.typing_field.clear()\n\n    def _access_typing_field(self):\n        \"\"\"Start the right application and access the typing field where we\n        will type our text.\n        \"\"\"\n        if self.platform == ANDROID:\n            subprocess.run(\n                [\"adb\", \"shell\", \"am\", \"start\", \"-a\", \"android.intent.action.VIEW\", \"-d\", BROWSER_PAD_URL],\n                stdout=subprocess.PIPE,\n            )\n            typing_field_loaded = False\n            while not typing_field_loaded:\n                typing_fields = self.driver.find_elements(By.CLASS_NAME, ANDROID_TYPING_FIELD_CLASS_NAME)\n                typing_field_loaded = len(typing_fields) == 2\n            self.typing_field = typing_fields[0]\n        else:\n            self.driver.find_element(By.CLASS_NAME, IOS_START_CHAT_CLASS_NAME).click()\n            self.typing_field = self.driver.find_element(By.ID, IOS_TYPING_FIELD_ID)\n        self.typing_field.click()\n        self.typing_field.clear()\n\n    def get_android_devices() -> List[str]:\n        \"\"\"Static method that uses the `adb devices` command to retrieve the\n        list of devices running.\n\n        Returns:\n            List of detected device UDID.\n        \"\"\"\n        result = subprocess.run([\"adb\", \"devices\"], stdout=subprocess.PIPE)\n        devices = result.stdout.decode().split(\"\\n\")\n        devices = [d.split()[0] for d in devices if not (d.startswith(\"List of devices attached\") or len(d) == 0)]\n        return devices\n\n    def select_keyboard(self, keyboard):\n        \"\"\"Searches the IME of the desired keyboard and selects it, only for Android.\n\n        Args:\n            keyboard (str): Keyboard to search.\n        \"\"\"\n        if keyboard not in KEYBOARD_PACKAGE:\n            print(\n                f\"Warning ! {keyboard}'s IME isn't provided (in `KEYBOARD_PACKAGE`), can't automatically select the \"\n                \"keyboard.\"\n            )\n            return\n\n        ime_list = subprocess.check_output([\"adb\", \"shell\", \"ime\", \"list\", \"-s\"], universal_newlines=True)\n        ime_name = None\n        for ime in ime_list.strip().split(\"\\n\"):\n            if KEYBOARD_PACKAGE[keyboard] in ime:\n                ime_name = ime\n                break\n        if ime_name:\n            subprocess.run(\n                [\"adb\", \"shell\", \"settings\", \"put\", \"secure\", \"show_ime_with_hard_keyboard\", \"1\"],\n                stdout=subprocess.PIPE,\n            )\n            subprocess.run([\"adb\", \"shell\", \"ime\", \"enable\", ime_name], stdout=subprocess.PIPE)\n            subprocess.run([\"adb\", \"shell\", \"ime\", \"set\", ime_name], stdout=subprocess.PIPE)\n\n    def get_ios_devices() -> List[Tuple[str, str]]:\n        \"\"\"Static method that uses the `xcrun simctl` command to retrieve the\n        list of booted devices.\n\n        Returns:\n            List of booted device platform and device name.\n        \"\"\"\n        devices = []\n\n        result = subprocess.run([\"xcrun\", \"simctl\", \"list\", \"devices\"], stdout=subprocess.PIPE)\n        out = result.stdout.decode().split(\"\\n\")\n\n        curr_platform = \"\"\n        for line in out:\n            if line.startswith(\"== \") and line.endswith(\" ==\"):\n                continue\n            elif line.startswith(\"-- \") and line.endswith(\" --\"):\n                curr_platform = line[3:-3]\n            else:\n                m = re.match(r\"\\s+([^\\t]+)\\s+\\([A-Z0-9\\-]+\\)\\s+\\((Booted|Shutdown)\\)\", line)\n                if m:\n                    device_name = m.group(1)\n                    status = m.group(2)\n\n                    if status == \"Booted\" and curr_platform.startswith(\"iOS \"):\n                        devices.append((curr_platform[4:], device_name))\n\n        return devices\n\n    def _paste(self, text: str):\n        \"\"\"Paste the given text into the typing field, to quickly simulate\n        typing a context.\n\n        Args:\n            text (str): Text to paste.\n        \"\"\"\n        if text == \"\":\n            self.typing_field.clear()\n            self.kb_is_upper = True\n            self.last_char_is_space = False\n            self.last_char_is_eos = False\n        else:\n            # Note : on Android, pasting content in the field will erase the previous content\n            # (which is what we want). On iOS it will not, we need to do it \"manually\"\n            if self.platform == IOS:\n                self.typing_field.clear()\n            if self.keyboard == KBKITPRO or self.keyboard == KBKITOSS or self.keyboard == FLEKSY:\n                # In the case of KeyboardKit / Fleksy, after pasting the content, typing a space\n                # trigger a punctuation (because previous context may end with a space)\n                # To avoid this behavior, break the cycle by typing a backspace\n                self._tap(self.layout[\"lowercase\"][\"backspace\"])\n            self.typing_field.send_keys(text)\n            self.kb_is_upper = len(text) > 1 and self._is_eos(text[-2]) and text.endswith(\" \")\n            self.last_char_is_space = text.endswith(\" \")\n            self.last_char_is_eos = self._is_eos(text[-1])\n\n    def paste(self, text: str):\n        \"\"\"Paste the given text into the typing field, to quickly simulate\n        typing a context.\n\n        This method is just a wrapper around `_paste()`, making sure the typing\n        field is accessible. If for some reason it is not accessible, it tries\n        to access it and perform the action again.\n\n        Args:\n            text (str): Text to paste.\n        \"\"\"\n        try:\n            self._paste(text)\n        except StaleElementReferenceException:\n            self._access_typing_field()\n            self._paste(text)\n\n    def type_characters(self, characters: str):  # noqa: C901\n        \"\"\"Type the given sentence on the keyboard. For each character, it\n        finds the keys to press and send a tap on the keyboard.\n\n        Args:\n            characters (str): The sentence to type.\n        \"\"\"\n        for c in characters:\n            if c == \" \":\n                if self.last_char_is_space:\n                    # If the previous character was a space, don't retype a space\n                    # because it can be transformed into a `.`\n                    continue\n\n                if self.kb_is_upper:\n                    self._tap(self.layout[\"uppercase\"][\"spacebar\"])\n                else:\n                    self._tap(self.layout[\"lowercase\"][\"spacebar\"])\n\n                # Behavior of the keyboard : if the previous character typed was an EOS marker\n                # and a space is typed, the keyboard automatically switch to uppercase\n                if self.last_char_is_eos:\n                    self.kb_is_upper = True\n            elif c in self.layout[\"lowercase\"]:\n                # The character is a lowercase character\n                if self.kb_is_upper:\n                    # If the keyboard is in uppercase mode, change it to lowercase\n                    self._tap(self.layout[\"uppercase\"][\"shift\"])\n                    if self.keyboard == SWIFTKEY:\n                        # Swiftkey needs double tap, otherwise we are capslocking\n                        self._tap(self.layout[\"uppercase\"][\"shift\"])\n                self._tap(self.layout[\"lowercase\"][c])\n            elif c in self.layout[\"uppercase\"]:\n                # The character is an uppercase character\n                if not self.kb_is_upper:\n                    # Change the keyboard to uppercase\n                    self._tap(self.layout[\"lowercase\"][\"shift\"])\n                self._tap(self.layout[\"uppercase\"][c])\n                # After typing one character, the keyboard automatically come back to lowercase\n            elif c in self.layout[\"numbers\"]:\n                # The character is a number of a special character\n                # Access the number keyboard properly\n                if self.kb_is_upper:\n                    self._tap(self.layout[\"uppercase\"][\"numbers\"])\n                else:\n                    self._tap(self.layout[\"lowercase\"][\"numbers\"])\n                self._tap(self.layout[\"numbers\"][c])\n\n                if c != \"'\" or self.keyboard in [GBOARD, SWIFTKEY]:\n                    # For some reason, when `'` is typed, the keyboard automatically goes back\n                    # to lowercase, so no need to re-tap the button (unless the keyboard is GBoard / Swiftkey).\n                    # In all other cases, switch back to letters keyboard\n                    self._tap(self.layout[\"numbers\"][\"letters\"])\n            else:\n                # Can't type this character, ignore it\n                continue\n\n            # Behavior of the keyboard : if the previous character typed was an EOS marker\n            # and a space is typed, the keyboard automatically switch to uppercase\n            self.kb_is_upper = self.last_char_is_eos and c == \" \"\n\n            # Update infos about what we typed\n            self.last_char_is_eos = self._is_eos(c)\n            self.last_char_is_space = c == \" \"\n\n    def _is_eos(self, c: str) -> bool:\n        \"\"\"Check if the given character is an End-Of-Sentence marker. If an EOS\n        marker is typed followed by a space, the keyboard automatically switch\n        to uppercase letters (unless it's GBoard).\n\n        Args:\n            c (str): Character to check.\n\n        Returns:\n            True if the character is an EOS marker.\n        \"\"\"\n        if self.keyboard == GBOARD:\n            return False\n        else:\n            return c in [\".\", \"!\", \"?\"]\n\n    def _tap(self, frame: List[int], keyboard_frame: List[int] = None):\n        \"\"\"Tap on the screen at the position described by the given frame.\n\n        Args:\n            frame (List[int]): Frame describing the position where to tap. A\n                frame is : [start_pos_x, start_pos_y, width, height].\n            keyboard_frame (List[int]): If specified, the Keyboard frame to\n                use. If `None`, it will use `self.layout[\"keyboard_frame\"]`.\n        \"\"\"\n        x, y, w, h = frame\n        base_x, base_y, *_ = keyboard_frame if keyboard_frame else self.layout[\"keyboard_frame\"]\n\n        pos_x = base_x + x + int(w / 2)\n        pos_y = base_y + y + int(h / 2)\n\n        actions = ActionChains(self.driver)\n        actions.w3c_actions = ActionBuilder(self.driver, mouse=PointerInput(interaction.POINTER_TOUCH, \"touch\"))\n        actions.w3c_actions.pointer_action.move_to_location(pos_x, pos_y)\n        actions.w3c_actions.pointer_action.pointer_down()\n        actions.w3c_actions.pointer_action.pause(0.05)\n        actions.w3c_actions.pointer_action.release()\n        actions.perform()\n\n    def _take_screenshot(self):\n        \"\"\"Take a screenshot of the full screen.\n\n        Returns:\n            The image of the screen.\n        \"\"\"\n        screen_data = self.driver.get_screenshot_as_png()\n        screen = np.asarray(Image.open(io.BytesIO(screen_data)))\n        return screen.copy()\n\n    def get_predictions(self, lang: str = \"en\") -> List[str]:\n        \"\"\"Retrieve the predictions displayed by the keyboard.\n\n        Args:\n            lang (str): Language to use for the OCR.\n\n        Returns:\n            List of predictions from the keyboard.\n        \"\"\"\n        if hasattr(self, \"detected\"):\n            # Only keyboards that were auto-detected (using XML tree) have the\n            # attribute `detected`. If that's the case, it means we\n            # can retrieve the suggestions directly from the XML tree !\n            predictions = self.detected.get_suggestions()\n        else:\n            # Other keyboards still have to use (slow) OCR\n            time.sleep(PREDICTION_DELAY)\n            screen = self._take_screenshot()\n\n            kb_x, kb_y, kb_w, kb_h = self.layout[\"keyboard_frame\"]\n            screen = screen[kb_y : kb_y + kb_h, kb_x : kb_x + kb_w]\n\n            predictions = []\n            for x, y, w, h in self.layout[\"suggestions_frames\"]:\n                suggestion_area = screen[y : y + h, x : x + w]\n                ocr_results = pytesseract.image_to_string(suggestion_area, config=TESSERACT_CONFIG)\n                pred = ocr_results.strip().replace(\"\u201c\", \"\").replace('\"', \"\").replace(\"\\\\\", \"\")\n                predictions.append(pred)\n\n        return predictions\n\n    def _get_text(self) -> str:\n        \"\"\"Return the text currently contained in the typing field.\n\n        Returns:\n            Text of the typing field.\n        \"\"\"\n        return self.typing_field.text\n\n    def get_text(self) -> str:\n        \"\"\"Return the text currently contained in the typing field.\n\n        This method is just a wrapper around `_get_text()`, making sure the\n        typing field is accessible. If for some reason it is not accessible, it\n        tries to access it and perform the action again.\n\n        Returns:\n            Text of the typing field.\n        \"\"\"\n        try:\n            return self._get_text()\n        except StaleElementReferenceException:\n            self._access_typing_field()\n            return self._get_text()\n\n    def show_keyboards(self):\n        \"\"\"Take a screenshot and overlay the given layout, for debugging the\n        position of each keys.\n        \"\"\"\n        # Type a character, in order to have some suggestions\n        # Keyboard starts with uppercase letter by default (unless GBoard), and\n        # automatically go to lowercase after\n        if self.keyboard == GBOARD:\n            self._tap(self.layout[\"lowercase\"][\"a\"])\n        else:\n            self._tap(self.layout[\"uppercase\"][\"A\"])\n        screen_lower = self._take_screenshot()\n\n        self._tap(self.layout[\"lowercase\"][\"shift\"])\n        screen_upper = self._take_screenshot()\n\n        self._tap(self.layout[\"lowercase\"][\"numbers\"])\n        screen_numbers = self._take_screenshot()\n\n        for layout_name, screen in zip(\n            [\"lowercase\", \"uppercase\", \"numbers\"], [screen_lower, screen_upper, screen_numbers]\n        ):\n            self._set_area_box(screen, (0, 0), self.layout[\"keyboard_frame\"], \"keyboard frame\")\n            if \"suggestions_frames\" in self.layout:\n                for i, suggestion_frame in enumerate(self.layout[\"suggestions_frames\"]):\n                    self._set_area_box(screen, self.layout[\"keyboard_frame\"], suggestion_frame, f\"suggestion {i}\")\n            for key_name, key_frame in self.layout[layout_name].items():\n                self._set_area_box(screen, self.layout[\"keyboard_frame\"], key_frame, key_name)\n\n            cv2.imshow(layout_name, screen)\n\n        cv2.waitKey(0)\n        cv2.destroyAllWindows()\n\n    def _set_area_box(self, image, base_coords: Tuple[int], coords: Tuple[int], tag: str):\n        \"\"\"Add an area box on the given image (color is random).\n\n        Args:\n            image: Image where to add the box.\n            base_coords (Tuple[int]): Base coordinates from the full image.\n            coords (Tuple[int]): Coordinates of the element, as well as\n                dimensions.\n            tag (str): Tag for this box.\n        \"\"\"\n        base_x, base_y, *_ = base_coords\n        x, y, w, h = coords\n        x += base_x\n        y += base_y\n        # Generate color only until 200, to ensure it's dark enough\n        color = (random.randint(0, 200), random.randint(0, 200), random.randint(0, 200))\n        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)\n        cv2.putText(image, tag, (x, y + h + 17), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)\n
"},{"location":"internals/#kebbie.emulator.Emulator.get_android_devices","title":"get_android_devices()","text":"

Static method that uses the adb devices command to retrieve the list of devices running.

Returns:

Type Description List[str]

List of detected device UDID.

Source code in kebbie/emulator.py
def get_android_devices() -> List[str]:\n    \"\"\"Static method that uses the `adb devices` command to retrieve the\n    list of devices running.\n\n    Returns:\n        List of detected device UDID.\n    \"\"\"\n    result = subprocess.run([\"adb\", \"devices\"], stdout=subprocess.PIPE)\n    devices = result.stdout.decode().split(\"\\n\")\n    devices = [d.split()[0] for d in devices if not (d.startswith(\"List of devices attached\") or len(d) == 0)]\n    return devices\n
"},{"location":"internals/#kebbie.emulator.Emulator.select_keyboard","title":"select_keyboard(keyboard)","text":"

Searches the IME of the desired keyboard and selects it, only for Android.

Parameters:

Name Type Description Default keyboard str

Keyboard to search.

required Source code in kebbie/emulator.py
def select_keyboard(self, keyboard):\n    \"\"\"Searches the IME of the desired keyboard and selects it, only for Android.\n\n    Args:\n        keyboard (str): Keyboard to search.\n    \"\"\"\n    if keyboard not in KEYBOARD_PACKAGE:\n        print(\n            f\"Warning ! {keyboard}'s IME isn't provided (in `KEYBOARD_PACKAGE`), can't automatically select the \"\n            \"keyboard.\"\n        )\n        return\n\n    ime_list = subprocess.check_output([\"adb\", \"shell\", \"ime\", \"list\", \"-s\"], universal_newlines=True)\n    ime_name = None\n    for ime in ime_list.strip().split(\"\\n\"):\n        if KEYBOARD_PACKAGE[keyboard] in ime:\n            ime_name = ime\n            break\n    if ime_name:\n        subprocess.run(\n            [\"adb\", \"shell\", \"settings\", \"put\", \"secure\", \"show_ime_with_hard_keyboard\", \"1\"],\n            stdout=subprocess.PIPE,\n        )\n        subprocess.run([\"adb\", \"shell\", \"ime\", \"enable\", ime_name], stdout=subprocess.PIPE)\n        subprocess.run([\"adb\", \"shell\", \"ime\", \"set\", ime_name], stdout=subprocess.PIPE)\n
"},{"location":"internals/#kebbie.emulator.Emulator.get_ios_devices","title":"get_ios_devices()","text":"

Static method that uses the xcrun simctl command to retrieve the list of booted devices.

Returns:

Type Description List[Tuple[str, str]]

List of booted device platform and device name.

Source code in kebbie/emulator.py
def get_ios_devices() -> List[Tuple[str, str]]:\n    \"\"\"Static method that uses the `xcrun simctl` command to retrieve the\n    list of booted devices.\n\n    Returns:\n        List of booted device platform and device name.\n    \"\"\"\n    devices = []\n\n    result = subprocess.run([\"xcrun\", \"simctl\", \"list\", \"devices\"], stdout=subprocess.PIPE)\n    out = result.stdout.decode().split(\"\\n\")\n\n    curr_platform = \"\"\n    for line in out:\n        if line.startswith(\"== \") and line.endswith(\" ==\"):\n            continue\n        elif line.startswith(\"-- \") and line.endswith(\" --\"):\n            curr_platform = line[3:-3]\n        else:\n            m = re.match(r\"\\s+([^\\t]+)\\s+\\([A-Z0-9\\-]+\\)\\s+\\((Booted|Shutdown)\\)\", line)\n            if m:\n                device_name = m.group(1)\n                status = m.group(2)\n\n                if status == \"Booted\" and curr_platform.startswith(\"iOS \"):\n                    devices.append((curr_platform[4:], device_name))\n\n    return devices\n
"},{"location":"internals/#kebbie.emulator.Emulator.paste","title":"paste(text)","text":"

Paste the given text into the typing field, to quickly simulate typing a context.

This method is just a wrapper around _paste(), making sure the typing field is accessible. If for some reason it is not accessible, it tries to access it and perform the action again.

Parameters:

Name Type Description Default text str

Text to paste.

required Source code in kebbie/emulator.py
def paste(self, text: str):\n    \"\"\"Paste the given text into the typing field, to quickly simulate\n    typing a context.\n\n    This method is just a wrapper around `_paste()`, making sure the typing\n    field is accessible. If for some reason it is not accessible, it tries\n    to access it and perform the action again.\n\n    Args:\n        text (str): Text to paste.\n    \"\"\"\n    try:\n        self._paste(text)\n    except StaleElementReferenceException:\n        self._access_typing_field()\n        self._paste(text)\n
"},{"location":"internals/#kebbie.emulator.Emulator.type_characters","title":"type_characters(characters)","text":"

Type the given sentence on the keyboard. For each character, it finds the keys to press and send a tap on the keyboard.

Parameters:

Name Type Description Default characters str

The sentence to type.

required Source code in kebbie/emulator.py
def type_characters(self, characters: str):  # noqa: C901\n    \"\"\"Type the given sentence on the keyboard. For each character, it\n    finds the keys to press and send a tap on the keyboard.\n\n    Args:\n        characters (str): The sentence to type.\n    \"\"\"\n    for c in characters:\n        if c == \" \":\n            if self.last_char_is_space:\n                # If the previous character was a space, don't retype a space\n                # because it can be transformed into a `.`\n                continue\n\n            if self.kb_is_upper:\n                self._tap(self.layout[\"uppercase\"][\"spacebar\"])\n            else:\n                self._tap(self.layout[\"lowercase\"][\"spacebar\"])\n\n            # Behavior of the keyboard : if the previous character typed was an EOS marker\n            # and a space is typed, the keyboard automatically switch to uppercase\n            if self.last_char_is_eos:\n                self.kb_is_upper = True\n        elif c in self.layout[\"lowercase\"]:\n            # The character is a lowercase character\n            if self.kb_is_upper:\n                # If the keyboard is in uppercase mode, change it to lowercase\n                self._tap(self.layout[\"uppercase\"][\"shift\"])\n                if self.keyboard == SWIFTKEY:\n                    # Swiftkey needs double tap, otherwise we are capslocking\n                    self._tap(self.layout[\"uppercase\"][\"shift\"])\n            self._tap(self.layout[\"lowercase\"][c])\n        elif c in self.layout[\"uppercase\"]:\n            # The character is an uppercase character\n            if not self.kb_is_upper:\n                # Change the keyboard to uppercase\n                self._tap(self.layout[\"lowercase\"][\"shift\"])\n            self._tap(self.layout[\"uppercase\"][c])\n            # After typing one character, the keyboard automatically come back to lowercase\n        elif c in self.layout[\"numbers\"]:\n            # The character is a number of a special character\n            # Access the number keyboard properly\n            if self.kb_is_upper:\n                self._tap(self.layout[\"uppercase\"][\"numbers\"])\n            else:\n                self._tap(self.layout[\"lowercase\"][\"numbers\"])\n            self._tap(self.layout[\"numbers\"][c])\n\n            if c != \"'\" or self.keyboard in [GBOARD, SWIFTKEY]:\n                # For some reason, when `'` is typed, the keyboard automatically goes back\n                # to lowercase, so no need to re-tap the button (unless the keyboard is GBoard / Swiftkey).\n                # In all other cases, switch back to letters keyboard\n                self._tap(self.layout[\"numbers\"][\"letters\"])\n        else:\n            # Can't type this character, ignore it\n            continue\n\n        # Behavior of the keyboard : if the previous character typed was an EOS marker\n        # and a space is typed, the keyboard automatically switch to uppercase\n        self.kb_is_upper = self.last_char_is_eos and c == \" \"\n\n        # Update infos about what we typed\n        self.last_char_is_eos = self._is_eos(c)\n        self.last_char_is_space = c == \" \"\n
"},{"location":"internals/#kebbie.emulator.Emulator.get_predictions","title":"get_predictions(lang='en')","text":"

Retrieve the predictions displayed by the keyboard.

Parameters:

Name Type Description Default lang str

Language to use for the OCR.

'en'

Returns:

Type Description List[str]

List of predictions from the keyboard.

Source code in kebbie/emulator.py
def get_predictions(self, lang: str = \"en\") -> List[str]:\n    \"\"\"Retrieve the predictions displayed by the keyboard.\n\n    Args:\n        lang (str): Language to use for the OCR.\n\n    Returns:\n        List of predictions from the keyboard.\n    \"\"\"\n    if hasattr(self, \"detected\"):\n        # Only keyboards that were auto-detected (using XML tree) have the\n        # attribute `detected`. If that's the case, it means we\n        # can retrieve the suggestions directly from the XML tree !\n        predictions = self.detected.get_suggestions()\n    else:\n        # Other keyboards still have to use (slow) OCR\n        time.sleep(PREDICTION_DELAY)\n        screen = self._take_screenshot()\n\n        kb_x, kb_y, kb_w, kb_h = self.layout[\"keyboard_frame\"]\n        screen = screen[kb_y : kb_y + kb_h, kb_x : kb_x + kb_w]\n\n        predictions = []\n        for x, y, w, h in self.layout[\"suggestions_frames\"]:\n            suggestion_area = screen[y : y + h, x : x + w]\n            ocr_results = pytesseract.image_to_string(suggestion_area, config=TESSERACT_CONFIG)\n            pred = ocr_results.strip().replace(\"\u201c\", \"\").replace('\"', \"\").replace(\"\\\\\", \"\")\n            predictions.append(pred)\n\n    return predictions\n
"},{"location":"internals/#kebbie.emulator.Emulator.get_text","title":"get_text()","text":"

Return the text currently contained in the typing field.

This method is just a wrapper around _get_text(), making sure the typing field is accessible. If for some reason it is not accessible, it tries to access it and perform the action again.

Returns:

Type Description str

Text of the typing field.

Source code in kebbie/emulator.py
def get_text(self) -> str:\n    \"\"\"Return the text currently contained in the typing field.\n\n    This method is just a wrapper around `_get_text()`, making sure the\n    typing field is accessible. If for some reason it is not accessible, it\n    tries to access it and perform the action again.\n\n    Returns:\n        Text of the typing field.\n    \"\"\"\n    try:\n        return self._get_text()\n    except StaleElementReferenceException:\n        self._access_typing_field()\n        return self._get_text()\n
"},{"location":"internals/#kebbie.emulator.Emulator.show_keyboards","title":"show_keyboards()","text":"

Take a screenshot and overlay the given layout, for debugging the position of each keys.

Source code in kebbie/emulator.py
def show_keyboards(self):\n    \"\"\"Take a screenshot and overlay the given layout, for debugging the\n    position of each keys.\n    \"\"\"\n    # Type a character, in order to have some suggestions\n    # Keyboard starts with uppercase letter by default (unless GBoard), and\n    # automatically go to lowercase after\n    if self.keyboard == GBOARD:\n        self._tap(self.layout[\"lowercase\"][\"a\"])\n    else:\n        self._tap(self.layout[\"uppercase\"][\"A\"])\n    screen_lower = self._take_screenshot()\n\n    self._tap(self.layout[\"lowercase\"][\"shift\"])\n    screen_upper = self._take_screenshot()\n\n    self._tap(self.layout[\"lowercase\"][\"numbers\"])\n    screen_numbers = self._take_screenshot()\n\n    for layout_name, screen in zip(\n        [\"lowercase\", \"uppercase\", \"numbers\"], [screen_lower, screen_upper, screen_numbers]\n    ):\n        self._set_area_box(screen, (0, 0), self.layout[\"keyboard_frame\"], \"keyboard frame\")\n        if \"suggestions_frames\" in self.layout:\n            for i, suggestion_frame in enumerate(self.layout[\"suggestions_frames\"]):\n                self._set_area_box(screen, self.layout[\"keyboard_frame\"], suggestion_frame, f\"suggestion {i}\")\n        for key_name, key_frame in self.layout[layout_name].items():\n            self._set_area_box(screen, self.layout[\"keyboard_frame\"], key_frame, key_name)\n\n        cv2.imshow(layout_name, screen)\n\n    cv2.waitKey(0)\n    cv2.destroyAllWindows()\n
"},{"location":"internals/#kebbie.emulator.LayoutDetector","title":"LayoutDetector","text":"

Base class for auto-detection of the keyboard layout.

To auto-detect a new keyboard, create a new sub-class, and overwite __init__() and get_suggestions(). Use the existing subclass for GBoard as reference.

Parameters:

Name Type Description Default driver Remote

The Appium driver, used to access elements on the emulator.

required tap_fn Callable

A callback used to tap at specific position on the screen. See Emulator._tap().

required xpath_root str

XPath to the root element of the keyboard.

required xpath_keys str

XPath to detect the keys elements.

required Source code in kebbie/emulator.py
class LayoutDetector:\n    \"\"\"Base class for auto-detection of the keyboard layout.\n\n    To auto-detect a new keyboard, create a new sub-class, and overwite\n    `__init__()` and `get_suggestions()`. Use the existing subclass for GBoard\n    as reference.\n\n    Args:\n        driver (webdriver.Remote): The Appium driver, used to access elements\n            on the emulator.\n        tap_fn (Callable): A callback used to tap at specific position on the\n            screen. See `Emulator._tap()`.\n        xpath_root (str): XPath to the root element of the keyboard.\n        xpath_keys (str): XPath to detect the keys elements.\n    \"\"\"\n\n    def __init__(\n        self, driver: webdriver.Remote, tap_fn: Callable, xpath_root: str, xpath_keys: str, android: bool = True\n    ):\n        self.driver = driver\n        self.tap = tap_fn\n        self.xpath_root = xpath_root\n        self.xpath_keys = xpath_keys\n        self.android = android\n\n        layout = {}\n\n        # Get the root element of our keyboard\n        root = self.driver.find_element(By.XPATH, self.xpath_root)\n\n        # On empty field, the keyboard is on uppercase\n        # So first, retrieve the keyboard frame and uppercase characters\n        kb_frame, screen_layout = self._detect_keys(root, current_layout=\"uppercase\")\n        layout[\"keyboard_frame\"] = kb_frame\n        layout[\"uppercase\"] = screen_layout\n\n        # Then, after typing a letter, the keyboard goes to lowercase automatically\n        self.tap(layout[\"uppercase\"][\"A\"], layout[\"keyboard_frame\"])\n        _, screen_layout = self._detect_keys(root, keyboard_frame=layout[\"keyboard_frame\"], current_layout=\"lowercase\")\n        layout[\"lowercase\"] = screen_layout\n\n        # Finally, access the symbols keyboard and get characters positions\n        self.tap(layout[\"lowercase\"][\"numbers\"], layout[\"keyboard_frame\"])\n        _, screen_layout = self._detect_keys(root, keyboard_frame=layout[\"keyboard_frame\"], current_layout=\"numbers\")\n        layout[\"numbers\"] = screen_layout\n\n        # Reset out keyboard to the original layer\n        self.tap(layout[\"numbers\"][\"letters\"], layout[\"keyboard_frame\"])\n\n        self.layout = layout\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Note that it's slower to access the XML through methods like\n        `find_element()`, and it's faster to access the raw XML with\n        `self.driver.page_source` and parse it as text directly.\n\n        Raises:\n            NotImplementedError: Exception raised if this method is not\n                overwritten.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        raise NotImplementedError\n\n    def _detect_keys(\n        self, root: WebElement, current_layout: str, keyboard_frame: List[int] = None\n    ) -> Tuple[List[int], Dict]:\n        \"\"\"This method detects all keys currently on screen.\n\n        If no keyboard_frame is given, it will also detects the keyboard frame.\n\n        Args:\n            root (WebElement): Root element in the XML tree that represents the\n                keyboard (with all its keys).\n            current_layout (str): Name of the current layout.\n            keyboard_frame (List[int], optional): Optionally, the keyboard\n                frame (so we don't need to re-detect it everytime).\n\n        Returns:\n            Keyboard frame\n            Layout with all the keys detected on this screen.\n        \"\"\"\n        layout = {}\n        if keyboard_frame is None:\n            if self.android:\n                # Detect the keyboard frame\n                kb = root.find_element(By.ID, \"android:id/inputArea\")\n                keyboard_frame = self._get_frame(kb)\n            else:\n                keyboard_frame = self._get_frame(root)\n\n        for key_elem in root.find_elements(By.XPATH, self.xpath_keys):\n            label = self._get_label(key_elem, current_layout=current_layout)\n            if label is not None:\n                layout[label] = self._get_frame(key_elem)\n\n        # Then update the letters positions to be relative to the keyboard frame\n        for k in layout:\n            layout[k][0] -= keyboard_frame[0]\n            layout[k][1] -= keyboard_frame[1]\n\n        return keyboard_frame, layout\n\n    def _get_frame(self, element: WebElement) -> List[int]:\n        \"\"\"For layout detection, this method returns the bounds of the given\n        element.\n\n        Args:\n            element (WebElement): XML Element describing a key.\n\n        Returns:\n            Bounds of this key.\n        \"\"\"\n        if self.android:\n            m = re.match(r\"\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]\", element.get_attribute(\"bounds\"))\n            if m:\n                bounds = [int(g) for g in m.groups()]\n                return [bounds[0], bounds[1], bounds[2] - bounds[0], bounds[3] - bounds[1]]\n        else:\n            r = json.loads(element.get_attribute(\"rect\"))\n            return [r[\"x\"], r[\"y\"], r[\"width\"], r[\"height\"]]\n\n    def _get_label(self, element: WebElement, current_layout: str, is_suggestion: bool = False) -> str:\n        \"\"\"For layout detection, this method returns the content of the given\n        element.\n\n        This method returns `None` if it's a key we don't care about. This\n        method takes care of translating the content (the name used in the XML\n        tree is not the same as the one used in our layout).\n\n        Args:\n            element (WebElement): XML Element describing a key.\n            current_layout (str): Name of the current layout.\n            is_suggestion (bool, optional): If we are retrieving the content of\n                a suggestion, the content shouldn't be translated.\n\n        Returns:\n            Content of the key, or None if it's a key we should ignore.\n        \"\"\"\n        content = element.get_attribute(\"content-desc\") if self.android else element.get_attribute(\"name\")\n\n        if is_suggestion:\n            # If we are getting the content of the suggestion, return the content directly\n            return content\n\n        if content in CONTENT_TO_IGNORE:\n            return None\n        elif not self.android and content == \"more\":\n            if current_layout == \"uppercase\" or current_layout == \"lowercase\":\n                return \"numbers\"\n            else:\n                return \"letters\"\n        elif content in CONTENT_TO_RENAME:\n            return CONTENT_TO_RENAME[content]\n        else:\n            return content\n
"},{"location":"internals/#kebbie.emulator.LayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Note that it's slower to access the XML through methods like find_element(), and it's faster to access the raw XML with self.driver.page_source and parse it as text directly.

Raises:

Type Description NotImplementedError

Exception raised if this method is not overwritten.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Note that it's slower to access the XML through methods like\n    `find_element()`, and it's faster to access the raw XML with\n    `self.driver.page_source` and parse it as text directly.\n\n    Raises:\n        NotImplementedError: Exception raised if this method is not\n            overwritten.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"internals/#kebbie.emulator.GboardLayoutDetector","title":"GboardLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the Gboard keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class GboardLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the Gboard keyboard. See `LayoutDetector` for more\n    information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=f\"./*/*[@package='{KEYBOARD_PACKAGE[GBOARD]}']\",\n            xpath_keys=\".//*[@resource-id][@content-desc]\",\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        sections = [\n            data\n            for data in self.driver.page_source.split(\"<android.widget.FrameLayout\")\n            if \"com.google.android.inputmethod\" in data\n        ]\n        for section in sections:\n            if \"content-desc\" in section and \"resource-id\" not in section and 'long-clickable=\"true\"' in section:\n                m = re.search(r\"content\\-desc=\\\"([^\\\"]*)\\\"\", section)\n                if m:\n                    content = m.group(1)\n\n                    # Deal with emojis\n                    emoji = re.match(r\"emoji (&[^;]+;)\", content)\n                    suggestions.append(html.unescape(emoji[1]) if emoji else content)\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.GboardLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    sections = [\n        data\n        for data in self.driver.page_source.split(\"<android.widget.FrameLayout\")\n        if \"com.google.android.inputmethod\" in data\n    ]\n    for section in sections:\n        if \"content-desc\" in section and \"resource-id\" not in section and 'long-clickable=\"true\"' in section:\n            m = re.search(r\"content\\-desc=\\\"([^\\\"]*)\\\"\", section)\n            if m:\n                content = m.group(1)\n\n                # Deal with emojis\n                emoji = re.match(r\"emoji (&[^;]+;)\", content)\n                suggestions.append(html.unescape(emoji[1]) if emoji else content)\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.IosLayoutDetector","title":"IosLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the iOS default keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class IosLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the iOS default keyboard. See `LayoutDetector` for\n    more information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=\".//XCUIElementTypeKeyboard\",\n            xpath_keys=\"(.//XCUIElementTypeKey|.//XCUIElementTypeButton)\",\n            android=False,\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        sections = [\n            data for data in self.driver.page_source.split(\"<XCUIElementTypeOther\") if \"name=\" in data.split(\">\")[0]\n        ]\n        is_typing_predictions_section = False\n        for section in sections:\n            m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", section)\n            if m:\n                name = m.group(1)\n\n                if name == \"Typing Predictions\":\n                    is_typing_predictions_section = True\n                    continue\n\n                if is_typing_predictions_section:\n                    suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.IosLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    sections = [\n        data for data in self.driver.page_source.split(\"<XCUIElementTypeOther\") if \"name=\" in data.split(\">\")[0]\n    ]\n    is_typing_predictions_section = False\n    for section in sections:\n        m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", section)\n        if m:\n            name = m.group(1)\n\n            if name == \"Typing Predictions\":\n                is_typing_predictions_section = True\n                continue\n\n            if is_typing_predictions_section:\n                suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.KbkitproLayoutDetector","title":"KbkitproLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the KeyboardKit Pro demo keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class KbkitproLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the KeyboardKit Pro demo keyboard. See\n    `LayoutDetector` for more information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=\".//XCUIElementTypeOther[XCUIElementTypeButton and XCUIElementTypeTextField]\",\n            xpath_keys=\".//XCUIElementTypeButton\",\n            android=False,\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        for data in self.driver.page_source.split(\"<XCUIElementTypeOther\"):\n            if \"<XCUIElementTypeTextField\" in data:\n                pred_part = data.split(\"<XCUIElementTypeTextField\")[0]\n                if \"<XCUIElementTypeButton\" in pred_part and 'name=\"Add\"' in pred_part:\n                    for elem in pred_part.split(\">\")[2:]:\n                        if \"<XCUIElementTypeTextField\" in elem:\n                            break\n                        m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", elem)\n                        if m:\n                            name = m.group(1)\n                            suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.KbkitproLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    for data in self.driver.page_source.split(\"<XCUIElementTypeOther\"):\n        if \"<XCUIElementTypeTextField\" in data:\n            pred_part = data.split(\"<XCUIElementTypeTextField\")[0]\n            if \"<XCUIElementTypeButton\" in pred_part and 'name=\"Add\"' in pred_part:\n                for elem in pred_part.split(\">\")[2:]:\n                    if \"<XCUIElementTypeTextField\" in elem:\n                        break\n                    m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", elem)\n                    if m:\n                        name = m.group(1)\n                        suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.KbkitossLayoutDetector","title":"KbkitossLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the KeyboardKit OSS demo keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class KbkitossLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the KeyboardKit OSS demo keyboard. See\n    `LayoutDetector` for more information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=\".//XCUIElementTypeOther[XCUIElementTypeButton and XCUIElementTypeStaticText]\",\n            xpath_keys=\".//XCUIElementTypeButton\",\n            android=False,\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        for data in self.driver.page_source.split(\"<XCUIElementTypeOther\"):\n            if \", Subtitle\" in data:\n                pred_part = data.split(\", Subtitle\")[0]\n                for elem in pred_part.split(\">\")[1:]:\n                    m = re.search(r\"name=\\\"([^\\\"]*)\\\"?\", elem)\n                    if m:\n                        name = m.group(1)\n                        suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.KbkitossLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    for data in self.driver.page_source.split(\"<XCUIElementTypeOther\"):\n        if \", Subtitle\" in data:\n            pred_part = data.split(\", Subtitle\")[0]\n            for elem in pred_part.split(\">\")[1:]:\n                m = re.search(r\"name=\\\"([^\\\"]*)\\\"?\", elem)\n                if m:\n                    name = m.group(1)\n                    suggestions.append(name.replace(\"\u201c\", \"\").replace(\"\u201d\", \"\"))\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.SwiftkeyLayoutDetector","title":"SwiftkeyLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the Swiftkey keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class SwiftkeyLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the Swiftkey keyboard. See `LayoutDetector` for more\n    information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=f\"./*/*[@package='{KEYBOARD_PACKAGE[SWIFTKEY]}']\",\n            xpath_keys=\".//*[@class='android.view.View'][@content-desc]\",\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        # Get the raw content as text, weed out useless elements\n        for data in self.driver.page_source.split(\"<android.widget.FrameLayout\"):\n            if \"com.touchtype.swiftkey\" in data and \"<android.view.View \" in data:\n                sections = data.split(\"<android.view.View \")\n                for section in sections[1:]:\n                    m = re.search(r\"content-desc=\\\"([^\\\"]*)\\\"\", section)\n                    if m:\n                        suggestions.append(html.unescape(m.group(1)))\n                break\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.SwiftkeyLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    # Get the raw content as text, weed out useless elements\n    for data in self.driver.page_source.split(\"<android.widget.FrameLayout\"):\n        if \"com.touchtype.swiftkey\" in data and \"<android.view.View \" in data:\n            sections = data.split(\"<android.view.View \")\n            for section in sections[1:]:\n                m = re.search(r\"content-desc=\\\"([^\\\"]*)\\\"\", section)\n                if m:\n                    suggestions.append(html.unescape(m.group(1)))\n            break\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.TappaLayoutDetector","title":"TappaLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the Tappa keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py
class TappaLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the Tappa keyboard. See `LayoutDetector` for more\n    information.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(\n            *args,\n            xpath_root=f\"./*/*[@package='{KEYBOARD_PACKAGE[TAPPA]}']\",\n            xpath_keys=\".//com.mocha.keyboard.inputmethod.keyboard.Key\",\n            **kwargs,\n        )\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        # Get the raw content as text, weed out useless elements\n        section = self.driver.page_source.split(f\"{KEYBOARD_PACKAGE[TAPPA]}:id/toolbar\")[1].split(\n            \"</android.widget.FrameLayout>\"\n        )[0]\n\n        for line in section.split(\"\\n\"):\n            if \"<android.widget.TextView\" in line:\n                m = re.search(r\"text=\\\"([^\\\"]*)\\\"\", line)\n                if m:\n                    suggestions.append(html.unescape(m.group(1)))\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.TappaLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    # Get the raw content as text, weed out useless elements\n    section = self.driver.page_source.split(f\"{KEYBOARD_PACKAGE[TAPPA]}:id/toolbar\")[1].split(\n        \"</android.widget.FrameLayout>\"\n    )[0]\n\n    for line in section.split(\"\\n\"):\n        if \"<android.widget.TextView\" in line:\n            m = re.search(r\"text=\\\"([^\\\"]*)\\\"\", line)\n            if m:\n                suggestions.append(html.unescape(m.group(1)))\n\n    return suggestions\n
"},{"location":"internals/#kebbie.emulator.FleksyLayoutDetector","title":"FleksyLayoutDetector","text":"

Bases: LayoutDetector

Layout detector for the Fleksy keyboard. See LayoutDetector for more information.

Note that this class is only semi-automatically detected : the layout itself is not detected, but the suggestions are retrieved from the XML tree (no need to rely on OCR, much faster). The layout is hard-coded for now.

Source code in kebbie/emulator.py
class FleksyLayoutDetector(LayoutDetector):\n    \"\"\"Layout detector for the Fleksy keyboard. See `LayoutDetector` for more\n    information.\n\n    Note that this class is only semi-automatically detected : the layout\n    itself is not detected, but the suggestions are retrieved from the XML tree\n    (no need to rely on OCR, much faster). The layout is hard-coded for now.\n    \"\"\"\n\n    def __init__(self, driver: webdriver.Remote):\n        self.driver = driver\n\n        # Adapt the layout to the screen\n        w = FLEKSY_LAYOUT[\"keyboard_frame\"][2]\n        h = FLEKSY_LAYOUT[\"keyboard_frame\"][3]\n        self.layout = {\"keyboard_frame\": FLEKSY_LAYOUT[\"keyboard_frame\"]}\n        for layout_name in [\"lowercase\", \"uppercase\", \"numbers\"]:\n            for key_name, key_frame in FLEKSY_LAYOUT[layout_name].items():\n                if layout_name not in self.layout:\n                    self.layout[layout_name] = {}\n                self.layout[layout_name][key_name] = [\n                    int(key_frame[0] * w),\n                    int(key_frame[1] * h),\n                    int(key_frame[2] * w),\n                    int(key_frame[3] * h),\n                ]\n\n    def get_suggestions(self) -> List[str]:\n        \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n        Returns:\n            List of suggestions from the keyboard.\n        \"\"\"\n        suggestions = []\n\n        # Get the raw content as text, weed out useless elements\n        sections = [\n            s\n            for s in self.driver.page_source.split(\"XCUIElementTypeOther\")\n            if \"XCUIElementTypeStaticText\" in s and \"XCUIElementTypeButton\" not in s\n        ]\n\n        for s in sections:\n            m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", s)\n            if m:\n                suggestions.append(html.unescape(m.group(1)))\n\n        return suggestions\n
"},{"location":"internals/#kebbie.emulator.FleksyLayoutDetector.get_suggestions","title":"get_suggestions()","text":"

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type Description List[str]

List of suggestions from the keyboard.

Source code in kebbie/emulator.py
def get_suggestions(self) -> List[str]:\n    \"\"\"Method to retrieve the keyboard suggestions from the XML tree.\n\n    Returns:\n        List of suggestions from the keyboard.\n    \"\"\"\n    suggestions = []\n\n    # Get the raw content as text, weed out useless elements\n    sections = [\n        s\n        for s in self.driver.page_source.split(\"XCUIElementTypeOther\")\n        if \"XCUIElementTypeStaticText\" in s and \"XCUIElementTypeButton\" not in s\n    ]\n\n    for s in sections:\n        m = re.search(r\"name=\\\"([^\\\"]*)\\\"\", s)\n        if m:\n            suggestions.append(html.unescape(m.group(1)))\n\n    return suggestions\n
"},{"location":"internals/#gesturepy","title":"gesture.py","text":"

Module containing the function make_swipe_gesture, which is used to create a natural-looking swipe gesture from a list of letter-points.

"},{"location":"internals/#kebbie.gesture.make_swipe_gesture","title":"make_swipe_gesture(control_points)","text":"

Function to generate artificial swipe gesture from a list of points. The given points represents the typed letters on the keyboard. This function simply generate several other points between the control points. Points are generated using sequential Bezier curves. The resulting swipe gesture pass by the control points.

Parameters:

Name Type Description Default control_points List[Tuple[float, float]]

Control points, representing the letter typed. The resulting swipe gesture will pass by these points.

required

Returns:

Type Description List[Tuple[float, float]]

Points generated by the swipe gesture.

Source code in kebbie/gesture.py
def make_swipe_gesture(control_points: List[Tuple[float, float]]) -> List[Tuple[float, float]]:\n    \"\"\"Function to generate artificial swipe gesture from a list of points.\n    The given points represents the typed letters on the keyboard. This\n    function simply generate several other points between the control points.\n    Points are generated using sequential Bezier curves. The resulting swipe\n    gesture pass by the control points.\n\n    Args:\n        control_points (List[Tuple[float, float]]): Control points,\n            representing the letter typed. The resulting swipe gesture will\n            pass by these points.\n\n    Returns:\n        Points generated by the swipe gesture.\n    \"\"\"\n    gesture_points = [control_points[0]]\n\n    # Pick a \"style\" (speed & acceleration) and keep it constant across the gesture\n    speed = random.uniform(MIN_N_POINTS_PER_DIST, MAX_N_POINTS_PER_DIST)\n    acceleration = random.uniform(MIN_ACCELERATION, MAX_ACCELERATION)\n\n    # Generate bezier curves between each control points\n    for p1, p2 in zip(control_points[:-1], control_points[1:]):\n        # The distance between the 2 points will dictate the speed and radius\n        d = euclidian_dist(p1, p2)\n        radius = min(d, MAX_RADIUS)\n        n_points = max(1, int(d * speed))\n\n        linspace = accelerated_linspace(n_points, acceleration)\n\n        # We don't want the curves to be straight between the control points,\n        # so we generate random points to add curves\n        p1_curv = random_point_around(p1, radius=radius)\n        p2_curv = random_point_around(p2, radius=radius)\n\n        # Make the bezier curve with the specified number of points\n        xs, ys = bezier_curve([p2, p2_curv, p1_curv, p1], linspace=linspace)\n        bezier_points = list(zip(xs, ys))\n\n        # Make sure the control point p2 is here\n        if bezier_points[-1] != p2:\n            bezier_points.append(p2)\n        # p1 was already added in the previous loop, no need to add it\n        if bezier_points[0] == p1:\n            bezier_points = bezier_points[1:]\n\n        gesture_points.extend(bezier_points)\n\n    return gesture_points\n
"},{"location":"internals/#kebbie.gesture.random_point_around","title":"random_point_around(p, radius)","text":"

Generate a random point around the given point p, within the given radius.

Parameters:

Name Type Description Default p Tuple[float, float]

Coordinates to use as a starting point.

required radius float

Radius within the starting point to generate the random point.

required

Returns:

Type Description Tuple[float, float]

Coordinates of the generated random point.

Source code in kebbie/gesture.py
def random_point_around(p: Tuple[float, float], radius: float) -> Tuple[float, float]:\n    \"\"\"Generate a random point around the given point p, within the given\n    radius.\n\n    Args:\n        p (Tuple[float, float]): Coordinates to use as a starting point.\n        radius (float): Radius within the starting point to generate the random\n            point.\n\n    Returns:\n        Coordinates of the generated random point.\n    \"\"\"\n    rand_x = random.uniform(p[0] - radius, p[0] + radius)\n    rand_y = random.uniform(p[1] - radius, p[1] + radius)\n    return (rand_x, rand_y)\n
"},{"location":"internals/#kebbie.gesture.bernstein_poly","title":"bernstein_poly(i, n, t)","text":"

The Bernstein polynomial of n, i as a function of t.

Taken from : https://stackoverflow.com/a/12644499/9494790

Parameters:

Name Type Description Default i int

i

required n int

n

required t float

t

required

Returns:

Type Description float

The computed value for this polynomial function.

Source code in kebbie/gesture.py
def bernstein_poly(i: int, n: int, t: float) -> float:\n    \"\"\"The Bernstein polynomial of n, i as a function of t.\n\n    Taken from : https://stackoverflow.com/a/12644499/9494790\n\n    Args:\n        i (int): i\n        n (int): n\n        t (float): t\n\n    Returns:\n        The computed value for this polynomial function.\n    \"\"\"\n    return comb(n, i) * (t ** (n - i)) * (1 - t) ** i\n
"},{"location":"internals/#kebbie.gesture.bezier_curve","title":"bezier_curve(control_points, linspace)","text":"

Given a set of control points, return the bezier curve defined by the control points.

See : http://processingjs.nihongoresources.com/bezierinfo/

Taken from : https://stackoverflow.com/a/12644499/9494790

Parameters:

Name Type Description Default control_points List[Tuple[float, float]]

Control points used to generate the bezier curve.

required linspace List[float]

Linspace to use for sampling points across the Bezier curve.

required

Returns:

Type Description Tuple[List[float], List[float]]

Sampled points along the bezier curve.

Source code in kebbie/gesture.py
def bezier_curve(control_points: List[Tuple[float, float]], linspace: List[float]) -> Tuple[List[float], List[float]]:\n    \"\"\"Given a set of control points, return the bezier curve defined by the\n    control points.\n\n    See : http://processingjs.nihongoresources.com/bezierinfo/\n\n    Taken from : https://stackoverflow.com/a/12644499/9494790\n\n    Args:\n        control_points (List[Tuple[float, float]]): Control points used to\n            generate the bezier curve.\n        linspace (List[float]): Linspace to use for sampling points across the\n            Bezier curve.\n\n    Returns:\n        Sampled points along the bezier curve.\n    \"\"\"\n    n_points = len(control_points)\n    x_points = np.array([p[0] for p in control_points])\n    y_points = np.array([p[1] for p in control_points])\n\n    polynomial_array = np.array([bernstein_poly(i, n_points - 1, linspace) for i in range(0, n_points)])\n\n    x_vals = np.dot(x_points, polynomial_array)\n    y_vals = np.dot(y_points, polynomial_array)\n\n    return x_vals, y_vals\n
"},{"location":"internals/#kebbie.gesture.accelerated_linspace","title":"accelerated_linspace(n, acceleration)","text":"

Alternative to np.linspace, instead of giving a range of number evenly distributed, this one is not evenly distributed, and simulate an acceleration at first, and then a deceleration.

Parameters:

Name Type Description Default n int

Number of points to generate in the linspace.

required acceleration float

A number that dictate how constant the acceleration is. The lower, the more S-shape is used.

required

Returns:

Type Description List[float]

Generated points.

Source code in kebbie/gesture.py
def accelerated_linspace(n: int, acceleration: float) -> List[float]:\n    \"\"\"Alternative to np.linspace, instead of giving a range of number evenly\n    distributed, this one is not evenly distributed, and simulate an\n    acceleration at first, and then a deceleration.\n\n    Args:\n        n (int): Number of points to generate in the linspace.\n        acceleration (float): A number that dictate how constant the\n            acceleration is. The lower, the more S-shape is used.\n\n    Returns:\n        Generated points.\n    \"\"\"\n\n    def norm(x):\n        nom = x - x.min()\n        denom = x.max() - x.min()\n        return nom / denom\n\n    def sigmoid(x, k):\n        return 1 / (1 + np.exp(-x / k))\n\n    linspace = np.linspace(-1.0, 1.0, n)\n\n    if n <= 1:\n        return linspace\n    else:\n        return norm(sigmoid(linspace, k=acceleration))\n
"},{"location":"internals/#layoutpy","title":"layout.py","text":"

Module containing the helpers LayoutHelper, useful class to deal with the layout of a keyboard, access key positions, etc...

"},{"location":"internals/#kebbie.layout.KeyInfo","title":"KeyInfo dataclass","text":"

Structure containing all information needed for a given character (key).

Parameters:

Name Type Description Default klayer_id int

Keyboard Layer ID where this key is located.

required width float

Width of the key.

required height float

Height of the key.

required center Tuple[float, float]

Center position (x, y coordinates) of the key.

required Source code in kebbie/layout.py
@dataclass\nclass KeyInfo:\n    \"\"\"Structure containing all information needed for a given character (key).\n\n    Args:\n        klayer_id (int): Keyboard Layer ID where this key is located.\n        width (float): Width of the key.\n        height (float): Height of the key.\n        center (Tuple[float, float]): Center position (x, y coordinates) of the\n            key.\n    \"\"\"\n\n    klayer_id: int\n    width: float\n    height: float\n    center: Tuple[float, float]\n
"},{"location":"internals/#kebbie.layout.Key","title":"Key dataclass","text":"

Structure containing information needed for each key of a given keyboard layer.

Parameters:

Name Type Description Default char str

Character associated with this key.

required bounds Dict[str, float]

Dictionary representing the bounding box of the key. The dictionary should contains the following keys : right, left, top, bottom.

required Source code in kebbie/layout.py
@dataclass\nclass Key:\n    \"\"\"Structure containing information needed for each key of a given keyboard\n    layer.\n\n    Args:\n        char (str): Character associated with this key.\n        bounds (Dict[str, float]): Dictionary representing the bounding box of\n            the key. The dictionary should contains the following keys :\n            `right`, `left`, `top`, `bottom`.\n    \"\"\"\n\n    char: str\n    bounds: Dict[str, float]\n
"},{"location":"internals/#kebbie.layout.LayoutHelper","title":"LayoutHelper","text":"

Small class that represents a Keyboard layout. The goal of this class is to offer some easy-to-use method to deal with a keyboard layout.

Parameters:

Name Type Description Default lang str

Language of the layout to load.

'en-US' custom_keyboard Dict

If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.

None ignore_layers_after Optional[int])

Ignore higher layers of the keyboard layout. If None is given, no layer is ignored.

None Source code in kebbie/layout.py
class LayoutHelper:\n    \"\"\"Small class that represents a Keyboard layout. The goal of this class is\n    to offer some easy-to-use method to deal with a keyboard layout.\n\n    Args:\n        lang (str, optional): Language of the layout to load.\n        custom_keyboard (Dict, optional): If provided, instead of relying on\n            the keyboard layout provided by default, uses the given keyboard\n            layout.\n        ignore_layers_after (Optional[int]) : Ignore higher layers of the\n            keyboard layout. If `None` is given, no layer is ignored.\n    \"\"\"\n\n    def __init__(self, lang: str = \"en-US\", custom_keyboard: Dict = None, ignore_layers_after: Optional[int] = None):\n        keyboard = custom_keyboard if custom_keyboard is not None else load_keyboard(lang)\n        self.keys_info, self.klayers_info, self.accents = self._extract_infos(keyboard[\"layout\"], ignore_layers_after)\n        self.letter_accents = [c for c in self.accents if re.match(r\"^[\\pL]+$\", c)]\n        self.spelling_symbols = keyboard[\"settings\"][\"allowed_symbols_in_words\"]\n        self.layout_name = keyboard[\"keyboard\"][\"default-layout\"]\n\n    def _extract_infos(  # noqa: C901\n        self, keyboard_layout: Dict, ignore_layers_after: Optional[int] = None\n    ) -> Tuple[Dict[str, KeyInfo], Dict[int, Key], List[str]]:\n        \"\"\"This method reads the given keyboard layout, and extract useful data\n        structures from this (to be used later by other methods). This\n        basically builds the LayoutHelper class (and should be used only inside\n        the constructor).\n\n        Note:\n            The given keyboard layout contains 24 layers. Each key appears in\n            one (or several) layer of the keyboard. Accents are associated to\n            the same key as their non-accented version.\n            This class may be used to generate typing noise, so accents should\n            have their own keys (and closer accents should be represented by\n            closer keys). This method takes care of it, by generating \"virtual\n            keyboard layers\", for each group of accents. The goal is to\n            generate a virtual keyboard layer that is as close as possible as\n            the actual keyboard, used by real-users.\n\n        Args:\n            keyboard_layout (Dict): Dictionary representing the keyboard and\n                its layout.\n            ignore_layers_after (Optional[int]) : Ignore higher layers of the\n                keyboard layout. If `None` is given, no layer is ignored.\n\n        Returns:\n            Key information for each character in the keyboard.\n            Key information for each layer of the keyboard.\n            List of accents used in the keyboard.\n        \"\"\"\n        keys_info = {}  # Dict char -> key infos (bounds, center, klayer ID)\n        klayers_info = defaultdict(list)  # Dict klayer ID -> list of keys (bounds, char)\n        all_accents = set()\n\n        # A keyboard layout is made of several \"layers\", each identified by a KeyboardID\n        last_klayer_id = len(keyboard_layout)\n        for klayer in keyboard_layout:\n            if klayer[\"buttons\"] is None or (ignore_layers_after is not None and klayer[\"id\"] > ignore_layers_after):\n                continue\n\n            # Each layer is a list of button\n            for button in klayer[\"buttons\"]:\n                # Button always have a character, and optionally accents\n                char, accents = button[\"labels\"][0], button[\"labels\"][1:]\n\n                # Special characters : space, shift, numbers, magic, etc...\n                if button[\"type\"] != 1:\n                    if char.lower() == SPACE:\n                        char = \" \"\n                    elif char == POINT:\n                        # Points should be added to our key infos\n                        pass\n                    else:\n                        # Other special characters are ignored\n                        char = None\n\n                if char is None:\n                    continue\n\n                # Save the character and its key information\n                # Save it only if it's not already in a previous klayer\n                if char not in keys_info or keys_info[char].klayer_id > klayer[\"id\"]:\n                    keys_info[char] = KeyInfo(\n                        klayer[\"id\"],\n                        button[\"boundingRect\"][\"right\"] - button[\"boundingRect\"][\"left\"],\n                        button[\"boundingRect\"][\"bottom\"] - button[\"boundingRect\"][\"top\"],\n                        (button[\"centerPoint\"][\"x\"], button[\"centerPoint\"][\"y\"]),\n                    )\n                # But always save its info in the klayers info\n                klayers_info[klayer[\"id\"]].append(Key(char, button[\"boundingRect\"]))\n\n                # Then, save the accents if any\n                for i, char_accent in enumerate(accents):\n                    all_accents.add(char_accent)\n\n                    # Create a virtual position for the accent\n                    bounds, center = self._make_virtual_key(i, button[\"boundingRect\"])\n\n                    # Save the accent (only if not existing) in a new virtual klayer\n                    if char_accent not in keys_info:\n                        keys_info[char_accent] = KeyInfo(\n                            last_klayer_id,\n                            bounds[\"right\"] - bounds[\"left\"],\n                            bounds[\"bottom\"] - bounds[\"top\"],\n                            (center[\"x\"], center[\"y\"]),\n                        )\n                    # But always saveits info in the klayers info\n                    klayers_info[last_klayer_id].append(Key(char_accent, bounds))\n\n                # If we added some accent in a virtual klayer, don't forget to update the last klayer ID\n                if accents:\n                    last_klayer_id += 1\n\n        return keys_info, klayers_info, sorted(all_accents)\n\n    def _make_virtual_key(\n        self, idx: int, initial_bounds: Dict[str, float]\n    ) -> Tuple[Dict[str, float], Dict[str, float]]:\n        \"\"\"Method to create a new boundary for an accented character. Based on\n        the given id, the generated boundary box will be generated at a\n        different position.\n\n        This method tries to follow a similar pattern as the sample app, with\n        accents appearing in lines of 4 accents.\n\n        Args:\n            idx (int): The index of the bounding box to generate.\n            initial_bounds (Dict[str, float]): The bounding box of the\n                non-accented key.\n\n        Returns:\n            Generated bounding box.\n            Its associated center position.\n        \"\"\"\n        width = initial_bounds[\"right\"] - initial_bounds[\"left\"]\n        height = initial_bounds[\"bottom\"] - initial_bounds[\"top\"]\n\n        start_x = initial_bounds[\"left\"] + (idx % N_ACCENT_PER_LINE) * width\n        start_y = initial_bounds[\"bottom\"] - (idx // N_ACCENT_PER_LINE) * height\n\n        bounds = {\n            \"bottom\": start_y,\n            \"left\": start_x,\n            \"right\": start_x + width,\n            \"top\": start_y - height,\n        }\n        center = {\n            \"x\": bounds[\"left\"] + width / 2,\n            \"y\": bounds[\"top\"] + height / 2,\n        }\n        return bounds, center\n\n    def get_key_info(self, char: str) -> Tuple[float, float, float, float, int]:\n        \"\"\"Method to retrieve the information associated to a specific key.\n\n        Args:\n            char (str): Character for which to retrieve key information.\n\n        Raises:\n            KeyError: Exception raised if the given character can't be typed (\n                because it doesn't exist on this keyboard layout).\n\n        Returns:\n            Width of the key for the requested character.\n            Height of the key for the requested character.\n            Center position (x-axis) of the key for the requested character.\n            Center position (y-axis) of the key for the requested character.\n            Keyboard layer ID where the character's key is located.\n        \"\"\"\n        k = self.keys_info[char]\n        return k.width, k.height, k.center[0], k.center[1], k.klayer_id\n\n    def get_key(self, pos: Tuple[float, float], klayer_id: int) -> str:\n        \"\"\"Get the character associated with the given position.\n\n        Args:\n            pos (Tuple[float, float]): Position (x, y) in the keyboard.\n            klayer_id (int): Keyboard layer ID to use.\n\n        Returns:\n            Character associated to the given position.\n        \"\"\"\n        klayer = self.klayers_info[klayer_id]\n\n        try:\n            # Retrieve the key that contains the sampled position\n            key = next(\n                k\n                for k in klayer\n                if k.bounds[\"left\"] <= pos[0] <= k.bounds[\"right\"] and k.bounds[\"top\"] <= pos[1] <= k.bounds[\"bottom\"]\n            )\n        except StopIteration:\n            # Maybe the sampled position was out of bound -> retrieve the closest key\n            key = min(\n                klayer,\n                key=lambda k: euclidian_dist(\n                    pos,\n                    (\n                        k.bounds[\"left\"] + (k.bounds[\"right\"] - k.bounds[\"left\"]) / 2,\n                        k.bounds[\"top\"] + (k.bounds[\"bottom\"] - k.bounds[\"top\"]) / 2,\n                    ),\n                ),\n            )\n\n        return key.char\n
"},{"location":"internals/#kebbie.layout.LayoutHelper._extract_infos","title":"_extract_infos(keyboard_layout, ignore_layers_after=None)","text":"

This method reads the given keyboard layout, and extract useful data structures from this (to be used later by other methods). This basically builds the LayoutHelper class (and should be used only inside the constructor).

Note

The given keyboard layout contains 24 layers. Each key appears in one (or several) layer of the keyboard. Accents are associated to the same key as their non-accented version. This class may be used to generate typing noise, so accents should have their own keys (and closer accents should be represented by closer keys). This method takes care of it, by generating \"virtual keyboard layers\", for each group of accents. The goal is to generate a virtual keyboard layer that is as close as possible as the actual keyboard, used by real-users.

Parameters:

Name Type Description Default keyboard_layout Dict

Dictionary representing the keyboard and its layout.

required ignore_layers_after Optional[int])

Ignore higher layers of the keyboard layout. If None is given, no layer is ignored.

None

Returns:

Type Description Dict[str, KeyInfo]

Key information for each character in the keyboard.

Dict[int, Key]

Key information for each layer of the keyboard.

List[str]

List of accents used in the keyboard.

Source code in kebbie/layout.py
def _extract_infos(  # noqa: C901\n    self, keyboard_layout: Dict, ignore_layers_after: Optional[int] = None\n) -> Tuple[Dict[str, KeyInfo], Dict[int, Key], List[str]]:\n    \"\"\"This method reads the given keyboard layout, and extract useful data\n    structures from this (to be used later by other methods). This\n    basically builds the LayoutHelper class (and should be used only inside\n    the constructor).\n\n    Note:\n        The given keyboard layout contains 24 layers. Each key appears in\n        one (or several) layer of the keyboard. Accents are associated to\n        the same key as their non-accented version.\n        This class may be used to generate typing noise, so accents should\n        have their own keys (and closer accents should be represented by\n        closer keys). This method takes care of it, by generating \"virtual\n        keyboard layers\", for each group of accents. The goal is to\n        generate a virtual keyboard layer that is as close as possible as\n        the actual keyboard, used by real-users.\n\n    Args:\n        keyboard_layout (Dict): Dictionary representing the keyboard and\n            its layout.\n        ignore_layers_after (Optional[int]) : Ignore higher layers of the\n            keyboard layout. If `None` is given, no layer is ignored.\n\n    Returns:\n        Key information for each character in the keyboard.\n        Key information for each layer of the keyboard.\n        List of accents used in the keyboard.\n    \"\"\"\n    keys_info = {}  # Dict char -> key infos (bounds, center, klayer ID)\n    klayers_info = defaultdict(list)  # Dict klayer ID -> list of keys (bounds, char)\n    all_accents = set()\n\n    # A keyboard layout is made of several \"layers\", each identified by a KeyboardID\n    last_klayer_id = len(keyboard_layout)\n    for klayer in keyboard_layout:\n        if klayer[\"buttons\"] is None or (ignore_layers_after is not None and klayer[\"id\"] > ignore_layers_after):\n            continue\n\n        # Each layer is a list of button\n        for button in klayer[\"buttons\"]:\n            # Button always have a character, and optionally accents\n            char, accents = button[\"labels\"][0], button[\"labels\"][1:]\n\n            # Special characters : space, shift, numbers, magic, etc...\n            if button[\"type\"] != 1:\n                if char.lower() == SPACE:\n                    char = \" \"\n                elif char == POINT:\n                    # Points should be added to our key infos\n                    pass\n                else:\n                    # Other special characters are ignored\n                    char = None\n\n            if char is None:\n                continue\n\n            # Save the character and its key information\n            # Save it only if it's not already in a previous klayer\n            if char not in keys_info or keys_info[char].klayer_id > klayer[\"id\"]:\n                keys_info[char] = KeyInfo(\n                    klayer[\"id\"],\n                    button[\"boundingRect\"][\"right\"] - button[\"boundingRect\"][\"left\"],\n                    button[\"boundingRect\"][\"bottom\"] - button[\"boundingRect\"][\"top\"],\n                    (button[\"centerPoint\"][\"x\"], button[\"centerPoint\"][\"y\"]),\n                )\n            # But always save its info in the klayers info\n            klayers_info[klayer[\"id\"]].append(Key(char, button[\"boundingRect\"]))\n\n            # Then, save the accents if any\n            for i, char_accent in enumerate(accents):\n                all_accents.add(char_accent)\n\n                # Create a virtual position for the accent\n                bounds, center = self._make_virtual_key(i, button[\"boundingRect\"])\n\n                # Save the accent (only if not existing) in a new virtual klayer\n                if char_accent not in keys_info:\n                    keys_info[char_accent] = KeyInfo(\n                        last_klayer_id,\n                        bounds[\"right\"] - bounds[\"left\"],\n                        bounds[\"bottom\"] - bounds[\"top\"],\n                        (center[\"x\"], center[\"y\"]),\n                    )\n                # But always saveits info in the klayers info\n                klayers_info[last_klayer_id].append(Key(char_accent, bounds))\n\n            # If we added some accent in a virtual klayer, don't forget to update the last klayer ID\n            if accents:\n                last_klayer_id += 1\n\n    return keys_info, klayers_info, sorted(all_accents)\n
"},{"location":"internals/#kebbie.layout.LayoutHelper._make_virtual_key","title":"_make_virtual_key(idx, initial_bounds)","text":"

Method to create a new boundary for an accented character. Based on the given id, the generated boundary box will be generated at a different position.

This method tries to follow a similar pattern as the sample app, with accents appearing in lines of 4 accents.

Parameters:

Name Type Description Default idx int

The index of the bounding box to generate.

required initial_bounds Dict[str, float]

The bounding box of the non-accented key.

required

Returns:

Type Description Dict[str, float]

Generated bounding box.

Dict[str, float]

Its associated center position.

Source code in kebbie/layout.py
def _make_virtual_key(\n    self, idx: int, initial_bounds: Dict[str, float]\n) -> Tuple[Dict[str, float], Dict[str, float]]:\n    \"\"\"Method to create a new boundary for an accented character. Based on\n    the given id, the generated boundary box will be generated at a\n    different position.\n\n    This method tries to follow a similar pattern as the sample app, with\n    accents appearing in lines of 4 accents.\n\n    Args:\n        idx (int): The index of the bounding box to generate.\n        initial_bounds (Dict[str, float]): The bounding box of the\n            non-accented key.\n\n    Returns:\n        Generated bounding box.\n        Its associated center position.\n    \"\"\"\n    width = initial_bounds[\"right\"] - initial_bounds[\"left\"]\n    height = initial_bounds[\"bottom\"] - initial_bounds[\"top\"]\n\n    start_x = initial_bounds[\"left\"] + (idx % N_ACCENT_PER_LINE) * width\n    start_y = initial_bounds[\"bottom\"] - (idx // N_ACCENT_PER_LINE) * height\n\n    bounds = {\n        \"bottom\": start_y,\n        \"left\": start_x,\n        \"right\": start_x + width,\n        \"top\": start_y - height,\n    }\n    center = {\n        \"x\": bounds[\"left\"] + width / 2,\n        \"y\": bounds[\"top\"] + height / 2,\n    }\n    return bounds, center\n
"},{"location":"internals/#kebbie.layout.LayoutHelper.get_key_info","title":"get_key_info(char)","text":"

Method to retrieve the information associated to a specific key.

Parameters:

Name Type Description Default char str

Character for which to retrieve key information.

required

Raises:

Type Description KeyError

Exception raised if the given character can't be typed ( because it doesn't exist on this keyboard layout).

Returns:

Type Description float

Width of the key for the requested character.

float

Height of the key for the requested character.

float

Center position (x-axis) of the key for the requested character.

float

Center position (y-axis) of the key for the requested character.

int

Keyboard layer ID where the character's key is located.

Source code in kebbie/layout.py
def get_key_info(self, char: str) -> Tuple[float, float, float, float, int]:\n    \"\"\"Method to retrieve the information associated to a specific key.\n\n    Args:\n        char (str): Character for which to retrieve key information.\n\n    Raises:\n        KeyError: Exception raised if the given character can't be typed (\n            because it doesn't exist on this keyboard layout).\n\n    Returns:\n        Width of the key for the requested character.\n        Height of the key for the requested character.\n        Center position (x-axis) of the key for the requested character.\n        Center position (y-axis) of the key for the requested character.\n        Keyboard layer ID where the character's key is located.\n    \"\"\"\n    k = self.keys_info[char]\n    return k.width, k.height, k.center[0], k.center[1], k.klayer_id\n
"},{"location":"internals/#kebbie.layout.LayoutHelper.get_key","title":"get_key(pos, klayer_id)","text":"

Get the character associated with the given position.

Parameters:

Name Type Description Default pos Tuple[float, float]

Position (x, y) in the keyboard.

required klayer_id int

Keyboard layer ID to use.

required

Returns:

Type Description str

Character associated to the given position.

Source code in kebbie/layout.py
def get_key(self, pos: Tuple[float, float], klayer_id: int) -> str:\n    \"\"\"Get the character associated with the given position.\n\n    Args:\n        pos (Tuple[float, float]): Position (x, y) in the keyboard.\n        klayer_id (int): Keyboard layer ID to use.\n\n    Returns:\n        Character associated to the given position.\n    \"\"\"\n    klayer = self.klayers_info[klayer_id]\n\n    try:\n        # Retrieve the key that contains the sampled position\n        key = next(\n            k\n            for k in klayer\n            if k.bounds[\"left\"] <= pos[0] <= k.bounds[\"right\"] and k.bounds[\"top\"] <= pos[1] <= k.bounds[\"bottom\"]\n        )\n    except StopIteration:\n        # Maybe the sampled position was out of bound -> retrieve the closest key\n        key = min(\n            klayer,\n            key=lambda k: euclidian_dist(\n                pos,\n                (\n                    k.bounds[\"left\"] + (k.bounds[\"right\"] - k.bounds[\"left\"]) / 2,\n                    k.bounds[\"top\"] + (k.bounds[\"bottom\"] - k.bounds[\"top\"]) / 2,\n                ),\n            ),\n        )\n\n    return key.char\n
"},{"location":"internals/#noise_modelpy","title":"noise_model.py","text":"

Module defining the NoiseModel class, which takes care of introducing typos in a clean text (and later see if the model can properly correct these typos).

"},{"location":"internals/#kebbie.noise_model.Typo","title":"Typo","text":"

Bases: Enum

Enum listing all possible typos that can be introduced.

Source code in kebbie/noise_model.py
class Typo(Enum):\n    \"\"\"Enum listing all possible typos that can be introduced.\"\"\"\n\n    # Deletions\n    DELETE_SPELLING_SYMBOL = \"DELETE_SPELLING_SYMBOL\"\n    DELETE_SPACE = \"DELETE_SPACE\"\n    DELETE_PUNCTUATION = \"DELETE_PUNCTUATION\"\n    DELETE_CHAR = \"DELETE_CHAR\"\n\n    # Additions\n    ADD_SPELLING_SYMBOL = \"ADD_SPELLING_SYMBOL\"\n    ADD_SPACE = \"ADD_SPACE\"\n    ADD_PUNCTUATION = \"ADD_PUNCTUATION\"\n    ADD_CHAR = \"ADD_CHAR\"\n\n    # Substitutions\n    SUBSTITUTE_CHAR = \"SUBSTITUTE_CHAR\"\n\n    # Simplifications\n    SIMPLIFY_ACCENT = \"SIMPLIFY_ACCENT\"\n    SIMPLIFY_CASE = \"SIMPLIFY_CASE\"\n\n    # Transposition\n    TRANSPOSE_CHAR = \"TRANSPOSE_CHAR\"\n\n    # Common typos\n    COMMON_TYPO = \"COMMON_TYPO\"\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel","title":"NoiseModel","text":"

Class responsible for introducing typo in a clean text.

Most of typos are introduced on text directly. Then fuzzy typing is applied, using two Gaussian distributions (for x-axis and y-axis), mimicking a user typing on a soft keyboard.

The ratio arguments are here to choose how wide the Gaussian distribution is. A wider distribution will be less precise, a narrower distribution will be more precise. To test how wide a ratio is, run the following code :

from scipy.stats import norm\n\ndef compute(x):\n    cdf = norm.cdf(x)\n    return cdf - (1 - cdf)\n\nprint(compute(2.32))    # >>> 0.9796591226625606\n
So in this case, a ratio of 2.32 gives a precision of ~98% (a typo will be introduced in 2% of the cases).

Parameters:

Name Type Description Default lang str

Language used.

required custom_keyboard Dict

If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.

None common_typos Optional[Dict[str, List[str]]]

Dictionary of common typos. If None, common typos are not used.

None typo_probs Optional[Dict[str, float]]

Probabilities for each type of typos. If None is given, DEFAULT_TYPO_PROBS is used.

None x_offset float

Parameter for the Gaussian distribution for the fuzzy typing. Base position offset on the x-axis.

0 y_offset float

Parameter for the Gaussian distribution for the fuzzy typing. Base position offset on the y-axis.

0 x_ratio float

Parameter for the Gaussian distribution for the fuzzy typing. It controls how wide the distribution is on the x-axis, which is the precision of the typing.

DEFAULT_SIGMA_RATIO y_ratio float

Parameter for the Gaussian distribution for the fuzzy typing. It controls how wide the distribution is on the y-axis, which is the precision of the typing.

DEFAULT_SIGMA_RATIO Source code in kebbie/noise_model.py
class NoiseModel:\n    \"\"\"Class responsible for introducing typo in a clean text.\n\n    Most of typos are introduced on text directly. Then fuzzy typing is\n    applied, using two Gaussian distributions (for x-axis and y-axis),\n    mimicking a user typing on a soft keyboard.\n\n    The ratio arguments are here to choose how wide the Gaussian distribution\n    is. A wider distribution will be less precise, a narrower distribution will\n    be more precise. To test how wide a ratio is, run the following code :\n    ```\n    from scipy.stats import norm\n\n    def compute(x):\n        cdf = norm.cdf(x)\n        return cdf - (1 - cdf)\n\n    print(compute(2.32))    # >>> 0.9796591226625606\n    ```\n    So in this case, a ratio of `2.32` gives a precision of ~98% (a typo will\n    be introduced in 2% of the cases).\n\n    Args:\n        lang (str): Language used.\n        custom_keyboard (Dict, optional): If provided, instead of relying on\n            the keyboard layout provided by default, uses the given keyboard\n            layout.\n        common_typos (Optional[Dict[str, List[str]]], optional): Dictionary of\n            common typos. If `None`, common typos are not used.\n        typo_probs (Optional[Dict[str, float]], optional): Probabilities for\n            each type of typos. If `None` is given, `DEFAULT_TYPO_PROBS` is\n            used.\n        x_offset (float, optional): Parameter for the Gaussian distribution for\n            the fuzzy typing. Base position offset on the x-axis.\n        y_offset (float, optional): Parameter for the Gaussian distribution for\n            the fuzzy typing. Base position offset on the y-axis.\n        x_ratio (float, optional): Parameter for the Gaussian distribution for\n            the fuzzy typing. It controls how wide the distribution is on the\n            x-axis, which is the precision of the typing.\n        y_ratio (float, optional): Parameter for the Gaussian distribution for\n            the fuzzy typing. It controls how wide the distribution is on the\n            y-axis, which is the precision of the typing.\n    \"\"\"\n\n    def __init__(\n        self,\n        lang: str,\n        custom_keyboard: Dict = None,\n        common_typos: Optional[Dict[str, List[str]]] = None,\n        typo_probs: Optional[Dict[str, float]] = None,\n        x_offset: float = 0,\n        y_offset: float = 0,\n        x_ratio: float = DEFAULT_SIGMA_RATIO,\n        y_ratio: float = DEFAULT_SIGMA_RATIO,\n    ):\n        self.lang = lang\n        self.x_offset, self.y_offset = x_offset, y_offset\n        self.x_ratio, self.y_ratio = x_ratio, y_ratio\n        self.klayout = LayoutHelper(self.lang, custom_keyboard=custom_keyboard, ignore_layers_after=3)\n        self.probs = typo_probs if typo_probs is not None else DEFAULT_TYPO_PROBS\n        self.common_typos = common_typos if common_typos is not None else self._get_common_typos()\n\n    def type_till_space(\n        self,\n        words: List[str],\n    ) -> Tuple[\n        List[Optional[Tuple[float, float]]],\n        str,\n        int,\n        List[Typo],\n    ]:\n        \"\"\"Method introducing typos word by word.\n\n        This method receives a list of words, and type these words while\n        introducing typos.\n        So most of the time, only one word will be typed and the method will\n        return. In some cases, the space is mistyped or deleted, so two words\n        are typed.\n\n        Args:\n            words (List[str]): List of words to type.\n\n        Returns:\n            List of keystrokes (may contains some None).\n            The typed characters as string.\n            The number of words typed.\n            The list of typos introduced in the string typed.\n        \"\"\"\n        all_keystrokes = []\n        all_typed_char = \"\"\n        all_typos = []\n\n        for i, word in enumerate(words):\n            # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos\n            error_free = False if self._is_correctable(word) else True\n\n            # Add typos in the word\n            noisy_word, typos = self._introduce_typos(word, error_free=error_free)\n            all_typos += typos\n\n            # Type the word (fuzzy)\n            keystrokes, typed_char, typos = self._fuzzy_type(noisy_word, error_free=error_free)\n            all_keystrokes += keystrokes\n            all_typed_char += typed_char\n            all_typos += typos\n\n            # Then, we try to type a space (separator between words)\n            # TODO : Modify this part for languages without space\n            noisy_space, sp_typo_1 = self._introduce_typos(SPACE)\n            keystrokes, typed_char, sp_typo_2 = self._fuzzy_type(noisy_space)\n\n            # If the space is correctly typed, return now, otherwise type the next word\n            if not sp_typo_1 and not sp_typo_2:\n                break\n            else:\n                all_keystrokes += keystrokes\n                all_typed_char += typed_char\n                all_typos += sp_typo_1 + sp_typo_2\n\n        return all_keystrokes, all_typed_char, i + 1, all_typos\n\n    def swipe(self, word: str) -> Optional[List[Tuple[float, float]]]:\n        \"\"\"Method for creating an artificial swipe gesture given a word.\n\n        Args:\n            word (str): Word to type with a swipe gesture.\n\n        Returns:\n            Positions (x, y) of the generated swipe gesture, or None if the\n                swipe gesture couldn't be created.\n        \"\"\"\n        # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos\n        error_free = False if self._is_correctable(word) else True\n\n        # Get the core keystrokes (fuzzy)\n        keystrokes, *_ = self._fuzzy_type(word, error_free=error_free)\n\n        # If we can swipe that word, create the corresponding artificial gesture\n        if all(keystrokes) and len(keystrokes) > 1:\n            return make_swipe_gesture(keystrokes)\n        else:\n            return None\n\n    def _introduce_typos(self, word: str, error_free: bool = False) -> Tuple[str, List[Typo]]:  # noqa: C901\n        \"\"\"Method to introduce typos in a given string.\n\n        Either the word is changed into an existing common typo, or the word is\n        processed as a stream of characters, each character having a chance of\n        being mistyped.\n        This method only add regular typos (deletions, additions, etc...), and\n        is not introducing fuzzy typing.\n\n        Args:\n            word (str): Clean string where to add typos.\n            error_free (bool): If set to True, don't introduce typo. Defaults\n                to False.\n\n        Returns:\n            The noisy string.\n            The list of typos introduced.\n        \"\"\"\n        if error_free:\n            return word, []\n\n        # First of all, we either consider the word as a unit and introduce a\n        # language-specific common typo (if available), or treat the word as a\n        # sequence of character, where each character can have a typo\n        if word in self.common_typos and sample(self.probs[Typo.COMMON_TYPO]):\n            # Introduce a common typo\n            return random.choice(self.common_typos[word]), [Typo.COMMON_TYPO]\n\n        # From here, treat the word as a stream of characters, and potentially\n        # add typos for each character\n        noisy_word = \"\"\n        typos = []\n        word_chars = list(word)\n        for i, char in enumerate(word_chars):\n            # First, potentially apply simplifications (removing accent, or\n            # lowercasing an uppercase character)\n            # Note that if the full word is uppercase, we don't apply lowercase\n            # simplification (doesn't feel like a natural typo a user would do)\n            if char in self.klayout.letter_accents and sample(self.probs[Typo.SIMPLIFY_ACCENT]):\n                char = strip_accents(char)\n                typos.append(Typo.SIMPLIFY_ACCENT)\n            if char.isupper() and len(word) > 1 and not word.isupper() and sample(self.probs[Typo.SIMPLIFY_CASE]):\n                char = char.lower()\n                typos.append(Typo.SIMPLIFY_CASE)\n\n            # Check if this character exists on our keyboard\n            try:\n                *_, klayer_id = self.klayout.get_key_info(char)\n                char_is_on_kb = True\n                char_is_on_default_kb = klayer_id == 0\n            except KeyError:\n                char_is_on_kb = char_is_on_default_kb = False\n\n            # Then, add the possible typo depending on the character type\n            events = []\n            is_first_char = bool(i == 0)\n            is_last_char = bool(i >= (len(word_chars) - 1))\n            if char.isnumeric() or not char_is_on_kb:\n                # Don't introduce typos for numbers or symbols that are not on keyboard\n                pass\n            else:\n                if not is_last_char:\n                    # Only transpose char if they are on the same keyboard layer\n                    try:\n                        *_, next_char_klayer_id = self.klayout.get_key_info(word[i + 1])\n                    except KeyError:\n                        next_char_klayer_id = None\n\n                    if klayer_id == next_char_klayer_id:\n                        events.append(Typo.TRANSPOSE_CHAR)\n                if char in self.klayout.spelling_symbols:\n                    events.append(Typo.DELETE_SPELLING_SYMBOL)\n                    events.append(Typo.ADD_SPELLING_SYMBOL)\n                elif char.isspace():\n                    events.append(Typo.DELETE_SPACE)\n                    events.append(Typo.ADD_SPACE)\n                elif char in string.punctuation:\n                    events.append(Typo.DELETE_PUNCTUATION)\n                    events.append(Typo.ADD_PUNCTUATION)\n                elif char_is_on_default_kb:\n                    events.append(Typo.DELETE_CHAR)\n                    events.append(Typo.ADD_CHAR)\n\n            # If it's the last character (and we are not typing a space),\n            # don't add deletions typos, because it's an auto-completion case,\n            # not auto-correction\n            if is_last_char and word != SPACE:\n                events = [e for e in events if e not in DELETIONS]\n\n            # Get the probabilities for these possible events\n            typo_probs = {e: self.probs[e] for e in events}\n            if is_first_char:\n                # Deleting the first character of the word is not so common, update the probabilities accordingly\n                typo_probs = {e: p * FRONT_DELETION_MULTIPLIER if e in DELETIONS else p for e, p in typo_probs.items()}\n\n            # And sample one of them\n            typo = sample_among(typo_probs)\n\n            # Process the typo\n            if typo is Typo.TRANSPOSE_CHAR:\n                noisy_char = word_chars[i + 1]\n                word_chars[i + 1] = char\n            elif typo in [Typo.DELETE_SPELLING_SYMBOL, Typo.DELETE_SPACE, Typo.DELETE_PUNCTUATION, Typo.DELETE_CHAR]:\n                noisy_char = \"\"\n            elif typo in [Typo.ADD_SPELLING_SYMBOL, Typo.ADD_SPACE, Typo.ADD_PUNCTUATION, Typo.ADD_CHAR]:\n                noisy_char = f\"{char}{char}\"\n            else:  # No typo\n                noisy_char = char\n\n            noisy_word += noisy_char\n            if typo is not None:\n                typos.append(typo)\n\n        return noisy_word, typos\n\n    def _fuzzy_type(\n        self, word: str, error_free: bool = False\n    ) -> Tuple[List[Optional[Tuple[float, float]]], str, List[Typo]]:\n        \"\"\"Method adding fuzzy typing.\n\n        This method takes a string (potentially already noisy from other type\n        of typos), and fuzzy-type it : simulate a user on a soft-keyboard.\n        This \"fat-finger syndrom\" is simulated using two Gaussian\n        distributions, one for each axis (x, y).\n        This method also returns the generated keystrokes (positions on the\n        keyboard), but only for the default keyboard (ID = 0). Keystrokes from\n        other keyboard are set to None.\n\n        Args:\n            word (str): String to fuzzy-type.\n            error_free (bool): If set to True, don't introduce typo. Defaults\n                to False.\n\n        Returns:\n            List of keystrokes.\n            Fuzzy string (corresponding to the keystrokes).\n            List of typos introduced.\n        \"\"\"\n        fuzzy_word = \"\"\n        keystrokes = []\n        typos = []\n\n        # Type word character by character\n        for char in word:\n            try:\n                width, height, x_center, y_center, klayer_id = self.klayout.get_key_info(char)\n            except KeyError:\n                # This character doesn't exist on the current keyboard\n                # Just type it without introducing typo, like if the user copy-pasted it\n                keystrokes.append(None)\n                fuzzy_word += char\n                continue\n\n            # Sample a keystroke for this character\n            # Note that we don't generate typos for characters outside of the default keyboard\n            if error_free or klayer_id != 0:\n                keystroke = (x_center, y_center)\n            else:\n                # Compute mu and sigma for the Normal distribution\n                x_mu = x_center + self.x_offset\n                y_mu = y_center + self.y_offset\n                x_sigma = (width / 2) / self.x_ratio\n                y_sigma = (height / 2) / self.y_ratio\n\n                # Sample a position (x and y)\n                keystroke = (random.gauss(x_mu, x_sigma), random.gauss(y_mu, y_sigma))\n\n            # Convert it back to a character, to see where we tapped\n            fuzzy_char = self.klayout.get_key(keystroke, klayer_id)\n\n            # Save it (save the keystroke only if part of the default keyboard)\n            keystrokes.append(keystroke if klayer_id == 0 else None)\n            fuzzy_word += fuzzy_char\n            if fuzzy_char != char:\n                typos.append(Typo.SUBSTITUTE_CHAR)\n\n        return keystrokes, fuzzy_word, typos\n\n    def _is_correctable(self, word: str) -> bool:\n        \"\"\"Method returning True if we expect the given word to be corrected\n        upon typo introduction, False otherwise.\n\n        This is necessary to ensure we don't introduce typos in words that\n        can't be corrected, because if we do, it will be counted as error.\n\n        For now, are considered non-correctable :\n         * Words that don't contains any letter (from Unicode standard)\n\n        Args:\n            word (str): Word to classify as correctable or not.\n\n        Returns:\n            True if the word is correctable (and therefore we can introduce\n            typo), False otherwise.\n        \"\"\"\n        # Use the Unicode category `L` (see https://en.wikipedia.org/wiki/Unicode_character_property#General_Category)\n        return not bool(re.match(r\"^[^\\pL]+$\", word))\n\n    def _get_common_typos(self) -> Dict[str, List[str]]:\n        \"\"\"Retrieve the list (if it exists) of plausible common typos to use\n        when introducing typos.\n\n        Returns:\n            Dictionary where the keys are the correct words and the values are\n                the associated possible typos for this word.\n        \"\"\"\n        plang = self.lang.split(\"-\")[0]\n        common_typos_cache_file = os.path.join(CACHE_DIR, f\"{plang}.json\")\n\n        # Try to access the cached common typos, and if it fails, it means we\n        # don't have it locally\n        try:\n            with open(common_typos_cache_file, \"r\") as f:\n                return json.load(f)\n        except FileNotFoundError:\n            pass\n\n        # File is not cached, download & process the common typos from online\n        os.makedirs(os.path.dirname(common_typos_cache_file), exist_ok=True)\n        typos = defaultdict(list)\n        if plang == \"en\":\n            response = requests.get(TWEET_TYPO_CORPUS_URL)\n            for line in response.text.strip().split(\"\\n\"):\n                typoed_word, correct_word, *_ = line.split(\"\\t\")\n                typos[correct_word].append(typoed_word)\n        else:\n            return {}\n\n        # Save the retrieved typos in cache\n        with open(common_typos_cache_file, \"w\") as f:\n            json.dump(typos, f, indent=4)\n\n        return typos\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel.type_till_space","title":"type_till_space(words)","text":"

Method introducing typos word by word.

This method receives a list of words, and type these words while introducing typos. So most of the time, only one word will be typed and the method will return. In some cases, the space is mistyped or deleted, so two words are typed.

Parameters:

Name Type Description Default words List[str]

List of words to type.

required

Returns:

Type Description List[Optional[Tuple[float, float]]]

List of keystrokes (may contains some None).

str

The typed characters as string.

int

The number of words typed.

List[Typo]

The list of typos introduced in the string typed.

Source code in kebbie/noise_model.py
def type_till_space(\n    self,\n    words: List[str],\n) -> Tuple[\n    List[Optional[Tuple[float, float]]],\n    str,\n    int,\n    List[Typo],\n]:\n    \"\"\"Method introducing typos word by word.\n\n    This method receives a list of words, and type these words while\n    introducing typos.\n    So most of the time, only one word will be typed and the method will\n    return. In some cases, the space is mistyped or deleted, so two words\n    are typed.\n\n    Args:\n        words (List[str]): List of words to type.\n\n    Returns:\n        List of keystrokes (may contains some None).\n        The typed characters as string.\n        The number of words typed.\n        The list of typos introduced in the string typed.\n    \"\"\"\n    all_keystrokes = []\n    all_typed_char = \"\"\n    all_typos = []\n\n    for i, word in enumerate(words):\n        # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos\n        error_free = False if self._is_correctable(word) else True\n\n        # Add typos in the word\n        noisy_word, typos = self._introduce_typos(word, error_free=error_free)\n        all_typos += typos\n\n        # Type the word (fuzzy)\n        keystrokes, typed_char, typos = self._fuzzy_type(noisy_word, error_free=error_free)\n        all_keystrokes += keystrokes\n        all_typed_char += typed_char\n        all_typos += typos\n\n        # Then, we try to type a space (separator between words)\n        # TODO : Modify this part for languages without space\n        noisy_space, sp_typo_1 = self._introduce_typos(SPACE)\n        keystrokes, typed_char, sp_typo_2 = self._fuzzy_type(noisy_space)\n\n        # If the space is correctly typed, return now, otherwise type the next word\n        if not sp_typo_1 and not sp_typo_2:\n            break\n        else:\n            all_keystrokes += keystrokes\n            all_typed_char += typed_char\n            all_typos += sp_typo_1 + sp_typo_2\n\n    return all_keystrokes, all_typed_char, i + 1, all_typos\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel.swipe","title":"swipe(word)","text":"

Method for creating an artificial swipe gesture given a word.

Parameters:

Name Type Description Default word str

Word to type with a swipe gesture.

required

Returns:

Type Description Optional[List[Tuple[float, float]]]

Positions (x, y) of the generated swipe gesture, or None if the swipe gesture couldn't be created.

Source code in kebbie/noise_model.py
def swipe(self, word: str) -> Optional[List[Tuple[float, float]]]:\n    \"\"\"Method for creating an artificial swipe gesture given a word.\n\n    Args:\n        word (str): Word to type with a swipe gesture.\n\n    Returns:\n        Positions (x, y) of the generated swipe gesture, or None if the\n            swipe gesture couldn't be created.\n    \"\"\"\n    # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos\n    error_free = False if self._is_correctable(word) else True\n\n    # Get the core keystrokes (fuzzy)\n    keystrokes, *_ = self._fuzzy_type(word, error_free=error_free)\n\n    # If we can swipe that word, create the corresponding artificial gesture\n    if all(keystrokes) and len(keystrokes) > 1:\n        return make_swipe_gesture(keystrokes)\n    else:\n        return None\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel._introduce_typos","title":"_introduce_typos(word, error_free=False)","text":"

Method to introduce typos in a given string.

Either the word is changed into an existing common typo, or the word is processed as a stream of characters, each character having a chance of being mistyped. This method only add regular typos (deletions, additions, etc...), and is not introducing fuzzy typing.

Parameters:

Name Type Description Default word str

Clean string where to add typos.

required error_free bool

If set to True, don't introduce typo. Defaults to False.

False

Returns:

Type Description str

The noisy string.

List[Typo]

The list of typos introduced.

Source code in kebbie/noise_model.py
def _introduce_typos(self, word: str, error_free: bool = False) -> Tuple[str, List[Typo]]:  # noqa: C901\n    \"\"\"Method to introduce typos in a given string.\n\n    Either the word is changed into an existing common typo, or the word is\n    processed as a stream of characters, each character having a chance of\n    being mistyped.\n    This method only add regular typos (deletions, additions, etc...), and\n    is not introducing fuzzy typing.\n\n    Args:\n        word (str): Clean string where to add typos.\n        error_free (bool): If set to True, don't introduce typo. Defaults\n            to False.\n\n    Returns:\n        The noisy string.\n        The list of typos introduced.\n    \"\"\"\n    if error_free:\n        return word, []\n\n    # First of all, we either consider the word as a unit and introduce a\n    # language-specific common typo (if available), or treat the word as a\n    # sequence of character, where each character can have a typo\n    if word in self.common_typos and sample(self.probs[Typo.COMMON_TYPO]):\n        # Introduce a common typo\n        return random.choice(self.common_typos[word]), [Typo.COMMON_TYPO]\n\n    # From here, treat the word as a stream of characters, and potentially\n    # add typos for each character\n    noisy_word = \"\"\n    typos = []\n    word_chars = list(word)\n    for i, char in enumerate(word_chars):\n        # First, potentially apply simplifications (removing accent, or\n        # lowercasing an uppercase character)\n        # Note that if the full word is uppercase, we don't apply lowercase\n        # simplification (doesn't feel like a natural typo a user would do)\n        if char in self.klayout.letter_accents and sample(self.probs[Typo.SIMPLIFY_ACCENT]):\n            char = strip_accents(char)\n            typos.append(Typo.SIMPLIFY_ACCENT)\n        if char.isupper() and len(word) > 1 and not word.isupper() and sample(self.probs[Typo.SIMPLIFY_CASE]):\n            char = char.lower()\n            typos.append(Typo.SIMPLIFY_CASE)\n\n        # Check if this character exists on our keyboard\n        try:\n            *_, klayer_id = self.klayout.get_key_info(char)\n            char_is_on_kb = True\n            char_is_on_default_kb = klayer_id == 0\n        except KeyError:\n            char_is_on_kb = char_is_on_default_kb = False\n\n        # Then, add the possible typo depending on the character type\n        events = []\n        is_first_char = bool(i == 0)\n        is_last_char = bool(i >= (len(word_chars) - 1))\n        if char.isnumeric() or not char_is_on_kb:\n            # Don't introduce typos for numbers or symbols that are not on keyboard\n            pass\n        else:\n            if not is_last_char:\n                # Only transpose char if they are on the same keyboard layer\n                try:\n                    *_, next_char_klayer_id = self.klayout.get_key_info(word[i + 1])\n                except KeyError:\n                    next_char_klayer_id = None\n\n                if klayer_id == next_char_klayer_id:\n                    events.append(Typo.TRANSPOSE_CHAR)\n            if char in self.klayout.spelling_symbols:\n                events.append(Typo.DELETE_SPELLING_SYMBOL)\n                events.append(Typo.ADD_SPELLING_SYMBOL)\n            elif char.isspace():\n                events.append(Typo.DELETE_SPACE)\n                events.append(Typo.ADD_SPACE)\n            elif char in string.punctuation:\n                events.append(Typo.DELETE_PUNCTUATION)\n                events.append(Typo.ADD_PUNCTUATION)\n            elif char_is_on_default_kb:\n                events.append(Typo.DELETE_CHAR)\n                events.append(Typo.ADD_CHAR)\n\n        # If it's the last character (and we are not typing a space),\n        # don't add deletions typos, because it's an auto-completion case,\n        # not auto-correction\n        if is_last_char and word != SPACE:\n            events = [e for e in events if e not in DELETIONS]\n\n        # Get the probabilities for these possible events\n        typo_probs = {e: self.probs[e] for e in events}\n        if is_first_char:\n            # Deleting the first character of the word is not so common, update the probabilities accordingly\n            typo_probs = {e: p * FRONT_DELETION_MULTIPLIER if e in DELETIONS else p for e, p in typo_probs.items()}\n\n        # And sample one of them\n        typo = sample_among(typo_probs)\n\n        # Process the typo\n        if typo is Typo.TRANSPOSE_CHAR:\n            noisy_char = word_chars[i + 1]\n            word_chars[i + 1] = char\n        elif typo in [Typo.DELETE_SPELLING_SYMBOL, Typo.DELETE_SPACE, Typo.DELETE_PUNCTUATION, Typo.DELETE_CHAR]:\n            noisy_char = \"\"\n        elif typo in [Typo.ADD_SPELLING_SYMBOL, Typo.ADD_SPACE, Typo.ADD_PUNCTUATION, Typo.ADD_CHAR]:\n            noisy_char = f\"{char}{char}\"\n        else:  # No typo\n            noisy_char = char\n\n        noisy_word += noisy_char\n        if typo is not None:\n            typos.append(typo)\n\n    return noisy_word, typos\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel._fuzzy_type","title":"_fuzzy_type(word, error_free=False)","text":"

Method adding fuzzy typing.

This method takes a string (potentially already noisy from other type of typos), and fuzzy-type it : simulate a user on a soft-keyboard. This \"fat-finger syndrom\" is simulated using two Gaussian distributions, one for each axis (x, y). This method also returns the generated keystrokes (positions on the keyboard), but only for the default keyboard (ID = 0). Keystrokes from other keyboard are set to None.

Parameters:

Name Type Description Default word str

String to fuzzy-type.

required error_free bool

If set to True, don't introduce typo. Defaults to False.

False

Returns:

Type Description List[Optional[Tuple[float, float]]]

List of keystrokes.

str

Fuzzy string (corresponding to the keystrokes).

List[Typo]

List of typos introduced.

Source code in kebbie/noise_model.py
def _fuzzy_type(\n    self, word: str, error_free: bool = False\n) -> Tuple[List[Optional[Tuple[float, float]]], str, List[Typo]]:\n    \"\"\"Method adding fuzzy typing.\n\n    This method takes a string (potentially already noisy from other type\n    of typos), and fuzzy-type it : simulate a user on a soft-keyboard.\n    This \"fat-finger syndrom\" is simulated using two Gaussian\n    distributions, one for each axis (x, y).\n    This method also returns the generated keystrokes (positions on the\n    keyboard), but only for the default keyboard (ID = 0). Keystrokes from\n    other keyboard are set to None.\n\n    Args:\n        word (str): String to fuzzy-type.\n        error_free (bool): If set to True, don't introduce typo. Defaults\n            to False.\n\n    Returns:\n        List of keystrokes.\n        Fuzzy string (corresponding to the keystrokes).\n        List of typos introduced.\n    \"\"\"\n    fuzzy_word = \"\"\n    keystrokes = []\n    typos = []\n\n    # Type word character by character\n    for char in word:\n        try:\n            width, height, x_center, y_center, klayer_id = self.klayout.get_key_info(char)\n        except KeyError:\n            # This character doesn't exist on the current keyboard\n            # Just type it without introducing typo, like if the user copy-pasted it\n            keystrokes.append(None)\n            fuzzy_word += char\n            continue\n\n        # Sample a keystroke for this character\n        # Note that we don't generate typos for characters outside of the default keyboard\n        if error_free or klayer_id != 0:\n            keystroke = (x_center, y_center)\n        else:\n            # Compute mu and sigma for the Normal distribution\n            x_mu = x_center + self.x_offset\n            y_mu = y_center + self.y_offset\n            x_sigma = (width / 2) / self.x_ratio\n            y_sigma = (height / 2) / self.y_ratio\n\n            # Sample a position (x and y)\n            keystroke = (random.gauss(x_mu, x_sigma), random.gauss(y_mu, y_sigma))\n\n        # Convert it back to a character, to see where we tapped\n        fuzzy_char = self.klayout.get_key(keystroke, klayer_id)\n\n        # Save it (save the keystroke only if part of the default keyboard)\n        keystrokes.append(keystroke if klayer_id == 0 else None)\n        fuzzy_word += fuzzy_char\n        if fuzzy_char != char:\n            typos.append(Typo.SUBSTITUTE_CHAR)\n\n    return keystrokes, fuzzy_word, typos\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel._is_correctable","title":"_is_correctable(word)","text":"

Method returning True if we expect the given word to be corrected upon typo introduction, False otherwise.

This is necessary to ensure we don't introduce typos in words that can't be corrected, because if we do, it will be counted as error.

For now, are considered non-correctable : * Words that don't contains any letter (from Unicode standard)

Parameters:

Name Type Description Default word str

Word to classify as correctable or not.

required

Returns:

Type Description bool

True if the word is correctable (and therefore we can introduce

bool

typo), False otherwise.

Source code in kebbie/noise_model.py
def _is_correctable(self, word: str) -> bool:\n    \"\"\"Method returning True if we expect the given word to be corrected\n    upon typo introduction, False otherwise.\n\n    This is necessary to ensure we don't introduce typos in words that\n    can't be corrected, because if we do, it will be counted as error.\n\n    For now, are considered non-correctable :\n     * Words that don't contains any letter (from Unicode standard)\n\n    Args:\n        word (str): Word to classify as correctable or not.\n\n    Returns:\n        True if the word is correctable (and therefore we can introduce\n        typo), False otherwise.\n    \"\"\"\n    # Use the Unicode category `L` (see https://en.wikipedia.org/wiki/Unicode_character_property#General_Category)\n    return not bool(re.match(r\"^[^\\pL]+$\", word))\n
"},{"location":"internals/#kebbie.noise_model.NoiseModel._get_common_typos","title":"_get_common_typos()","text":"

Retrieve the list (if it exists) of plausible common typos to use when introducing typos.

Returns:

Type Description Dict[str, List[str]]

Dictionary where the keys are the correct words and the values are the associated possible typos for this word.

Source code in kebbie/noise_model.py
def _get_common_typos(self) -> Dict[str, List[str]]:\n    \"\"\"Retrieve the list (if it exists) of plausible common typos to use\n    when introducing typos.\n\n    Returns:\n        Dictionary where the keys are the correct words and the values are\n            the associated possible typos for this word.\n    \"\"\"\n    plang = self.lang.split(\"-\")[0]\n    common_typos_cache_file = os.path.join(CACHE_DIR, f\"{plang}.json\")\n\n    # Try to access the cached common typos, and if it fails, it means we\n    # don't have it locally\n    try:\n        with open(common_typos_cache_file, \"r\") as f:\n            return json.load(f)\n    except FileNotFoundError:\n        pass\n\n    # File is not cached, download & process the common typos from online\n    os.makedirs(os.path.dirname(common_typos_cache_file), exist_ok=True)\n    typos = defaultdict(list)\n    if plang == \"en\":\n        response = requests.get(TWEET_TYPO_CORPUS_URL)\n        for line in response.text.strip().split(\"\\n\"):\n            typoed_word, correct_word, *_ = line.split(\"\\t\")\n            typos[correct_word].append(typoed_word)\n    else:\n        return {}\n\n    # Save the retrieved typos in cache\n    with open(common_typos_cache_file, \"w\") as f:\n        json.dump(typos, f, indent=4)\n\n    return typos\n
"},{"location":"internals/#oraclepy","title":"oracle.py","text":"

Module defining the Oracle class, which is the class taking care of iterating the dataset, introducing typos using the noise model, and querying the Corrector to correct these typos. Then the scorer is used to compute metrics about the performances, and the results are returned.

"},{"location":"internals/#kebbie.oracle.Oracle","title":"Oracle","text":"

Class that takes care of testing a Corrector. It basically gets clean text data, adds noise to it, send the noisy data to the Corrector, and scores its output.

This class spawn multiple processes to decrease runtime.

Parameters:

Name Type Description Default lang str

Language used.

required test_data Dict[str, List[str]]

List of clean sentences for each domain.

required custom_keyboard Dict

If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.

required track_mistakes bool

Set to True for tracking the most common mistakes. Most common mistakes are added to the results dictionary.

required n_most_common_mistakes int

If track_mistakes is set to True, the top X mistakes to record.

required beta float

Beta to use for computing the F-beta score.

required Source code in kebbie/oracle.py
class Oracle:\n    \"\"\"Class that takes care of testing a Corrector. It basically gets clean\n    text data, adds noise to it, send the noisy data to the Corrector, and\n    scores its output.\n\n    This class spawn multiple processes to decrease runtime.\n\n    Args:\n        lang (str): Language used.\n        test_data (Dict[str, List[str]]): List of clean sentences for each\n            domain.\n        custom_keyboard (Dict): If provided, instead of relying on\n            the keyboard layout provided by default, uses the given keyboard\n            layout.\n        track_mistakes (bool): Set to `True` for tracking the most\n            common mistakes. Most common mistakes are added to the results\n            dictionary.\n        n_most_common_mistakes (int): If `track_mistakes` is set to\n            `True`, the top X mistakes to record.\n        beta (float): Beta to use for computing the F-beta score.\n    \"\"\"\n\n    def __init__(\n        self,\n        lang: str,\n        test_data: Dict[str, List[str]],\n        custom_keyboard: Dict,\n        track_mistakes: bool,\n        n_most_common_mistakes: int,\n        beta: float,\n    ) -> None:\n        super().__init__()\n\n        self.lang = lang\n        self.data = test_data\n        self.custom_keyboard = custom_keyboard\n        self.track_mistakes = track_mistakes\n        self.n_most_common_mistakes = n_most_common_mistakes\n        self.beta = beta\n\n    def test(self, corrector: Union[Corrector, List[Corrector]], n_proc: Optional[int], seed: int) -> Dict:\n        \"\"\"Main method, it tests the given Corrector, and returns results as a\n        dictionary.\n\n        This method spawn multiple processes to decrease runtime.\n\n        Args:\n            corrector (Union[Corrector, List[Corrector]]): Corrector to test.\n                If a list of Corrector is given, the argument `n_proc` is\n                ignored, and one corrector is assigned for each process.\n            n_proc (Optional[int]): Number of processes to use. If `None`,\n                `os.cpu_count()` is used.\n            seed (int): Seed to use for running the tests.\n\n        Returns:\n            Results formatted in a dictionary.\n        \"\"\"\n        # Initialize a global Scorer here, that will gather counts across processes\n        scorer = Scorer(domains=self.data.keys(), track_mistakes=self.track_mistakes)\n\n        # For multiprocessing\n        n_proc = n_proc if n_proc is not None else os.cpu_count()\n        d_size = sum(len(d) for d in self.data.values())\n\n        # Create the corrector for each process\n        proc_correctors = mp.Queue()\n        if isinstance(corrector, Corrector):\n            for _ in range(n_proc):\n                proc_correctors.put(corrector)\n        else:\n            # If we already have a list of correctors, assign one for each process\n            n_proc = len(corrector)\n            for c in corrector:\n                proc_correctors.put(c)\n\n        with mp.Pool(\n            processes=n_proc,\n            initializer=init_tester,\n            initargs=(tester, self.lang, self.custom_keyboard, proc_correctors, seed, self.track_mistakes),\n        ) as pool, tqdm(total=d_size) as pbar:\n            # Test data is made of several domain, where each domain contains a list of sentences\n            for domain, sentence_list in self.data.items():\n                chunk_size = max(min(CHUNK_SIZE, len(sentence_list) // n_proc), 1)\n                for scr in pool.imap_unordered(tester, sentence_list, chunksize=chunk_size):\n                    scr.set_domain(domain)\n                    scorer.add(scr)\n                    pbar.update(1)\n\n        # Retrieve the results\n        results = scorer.score(beta=self.beta)\n\n        # Then potentially add the most common mistakes\n        if self.track_mistakes:\n            mistakes = {}\n            for task in [\"nwp\", \"acp\", \"acr\"]:\n                task_name = {\"nwp\": \"next_word_prediction\", \"acp\": \"auto_completion\", \"acr\": \"auto_correction\"}[task]\n\n                m_count = getattr(scorer, f\"{task}_mistakes\")\n\n                mistakes[task_name] = [(\"Count\", \"Expected\", \"Predictions\", \"Context\")]\n                for m, c in m_count.most_common(self.n_most_common_mistakes):\n                    mistakes[task_name].append((c, m.actual, f\"[{', '.join(m.preds)}]\", m.context))\n\n            results[\"most_common_mistakes\"] = mistakes\n\n        return results\n
"},{"location":"internals/#kebbie.oracle.Oracle.test","title":"test(corrector, n_proc, seed)","text":"

Main method, it tests the given Corrector, and returns results as a dictionary.

This method spawn multiple processes to decrease runtime.

Parameters:

Name Type Description Default corrector Union[Corrector, List[Corrector]]

Corrector to test. If a list of Corrector is given, the argument n_proc is ignored, and one corrector is assigned for each process.

required n_proc Optional[int]

Number of processes to use. If None, os.cpu_count() is used.

required seed int

Seed to use for running the tests.

required

Returns:

Type Description Dict

Results formatted in a dictionary.

Source code in kebbie/oracle.py
def test(self, corrector: Union[Corrector, List[Corrector]], n_proc: Optional[int], seed: int) -> Dict:\n    \"\"\"Main method, it tests the given Corrector, and returns results as a\n    dictionary.\n\n    This method spawn multiple processes to decrease runtime.\n\n    Args:\n        corrector (Union[Corrector, List[Corrector]]): Corrector to test.\n            If a list of Corrector is given, the argument `n_proc` is\n            ignored, and one corrector is assigned for each process.\n        n_proc (Optional[int]): Number of processes to use. If `None`,\n            `os.cpu_count()` is used.\n        seed (int): Seed to use for running the tests.\n\n    Returns:\n        Results formatted in a dictionary.\n    \"\"\"\n    # Initialize a global Scorer here, that will gather counts across processes\n    scorer = Scorer(domains=self.data.keys(), track_mistakes=self.track_mistakes)\n\n    # For multiprocessing\n    n_proc = n_proc if n_proc is not None else os.cpu_count()\n    d_size = sum(len(d) for d in self.data.values())\n\n    # Create the corrector for each process\n    proc_correctors = mp.Queue()\n    if isinstance(corrector, Corrector):\n        for _ in range(n_proc):\n            proc_correctors.put(corrector)\n    else:\n        # If we already have a list of correctors, assign one for each process\n        n_proc = len(corrector)\n        for c in corrector:\n            proc_correctors.put(c)\n\n    with mp.Pool(\n        processes=n_proc,\n        initializer=init_tester,\n        initargs=(tester, self.lang, self.custom_keyboard, proc_correctors, seed, self.track_mistakes),\n    ) as pool, tqdm(total=d_size) as pbar:\n        # Test data is made of several domain, where each domain contains a list of sentences\n        for domain, sentence_list in self.data.items():\n            chunk_size = max(min(CHUNK_SIZE, len(sentence_list) // n_proc), 1)\n            for scr in pool.imap_unordered(tester, sentence_list, chunksize=chunk_size):\n                scr.set_domain(domain)\n                scorer.add(scr)\n                pbar.update(1)\n\n    # Retrieve the results\n    results = scorer.score(beta=self.beta)\n\n    # Then potentially add the most common mistakes\n    if self.track_mistakes:\n        mistakes = {}\n        for task in [\"nwp\", \"acp\", \"acr\"]:\n            task_name = {\"nwp\": \"next_word_prediction\", \"acp\": \"auto_completion\", \"acr\": \"auto_correction\"}[task]\n\n            m_count = getattr(scorer, f\"{task}_mistakes\")\n\n            mistakes[task_name] = [(\"Count\", \"Expected\", \"Predictions\", \"Context\")]\n            for m, c in m_count.most_common(self.n_most_common_mistakes):\n                mistakes[task_name].append((c, m.actual, f\"[{', '.join(m.preds)}]\", m.context))\n\n        results[\"most_common_mistakes\"] = mistakes\n\n    return results\n
"},{"location":"internals/#kebbie.oracle.init_tester","title":"init_tester(fn, lang, custom_keyboard, correctors, seed, track_mistakes)","text":"

Function run at process initialization for Tester workers.

Each worker in a Pool will run this function when created. It will instanciate several things needed for testing the given corrector : * A Tokenizer to split sentences into words * A NoiseModel to introduce typos * A Corrector instance, which is the model we want to test

Parameters:

Name Type Description Default fn Callable

Main tester function (instanciated objects will be attached to this function).

required lang str

Language used.

required custom_keyboard Dict

If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.

required correctors Queue

Queue containing list of correctors to test. Each process will get the next corrector available in queue.

required seed int

Base seed to use.

required track_mistakes bool

Set to True for tracking the most common mistakes.

required Source code in kebbie/oracle.py
def init_tester(\n    fn: Callable, lang: str, custom_keyboard: Dict, correctors: mp.Queue, seed: int, track_mistakes: bool\n) -> None:\n    \"\"\"Function run at process initialization for Tester workers.\n\n    Each worker in a Pool will run this function when created. It will\n    instanciate several things needed for testing the given corrector :\n     * A Tokenizer to split sentences into words\n     * A NoiseModel to introduce typos\n     * A Corrector instance, which is the model we want to test\n\n    Args:\n        fn (Callable): Main tester function (instanciated objects will be\n            attached to this function).\n        lang (str): Language used.\n        custom_keyboard (Dict, optional): If provided, instead of relying on\n            the keyboard layout provided by default, uses the given keyboard\n            layout.\n        correctors (mp.Queue): Queue containing list of correctors to test.\n            Each process will get the next corrector available in queue.\n        seed (int): Base seed to use.\n        track_mistakes (bool): Set to `True` for tracking the most common\n            mistakes.\n    \"\"\"\n    fn.tokenizer = BasicTokenizer()\n    fn.noisy = NoiseModel(lang, custom_keyboard=custom_keyboard)\n    fn.corrector = correctors.get()\n    fn.base_seed = seed\n    fn.track_mistakes = track_mistakes\n
"},{"location":"internals/#kebbie.oracle.tester","title":"tester(sentence)","text":"

Function to test a given sentence.

It uses the noise model to introduce typos word by word, run the Corrector on various tasks (auto-completion, auto-correction, next-word prediction), and score the results.

Parameters:

Name Type Description Default sentence str

Sentence to use as data for the test.

required

Returns:

Type Description Scorer

Scorer class with the prediction counts for this sentence.

Source code in kebbie/oracle.py
def tester(sentence: str) -> Scorer:\n    \"\"\"Function to test a given sentence.\n\n    It uses the noise model to introduce typos word by word, run the\n    Corrector on various tasks (auto-completion, auto-correction, next-word\n    prediction), and score the results.\n\n    Args:\n        sentence (str): Sentence to use as data for the test.\n\n    Returns:\n        Scorer class with the prediction counts for this sentence.\n    \"\"\"\n    # Set the seed for reproducibility, using the hash of the sentence\n    hsh = int(hashlib.sha256(sentence.encode(\"utf-8\")).hexdigest(), 16)\n    random.seed(tester.base_seed + hsh)\n    rnd_state = random.getstate()\n\n    # Tokenize the sentence into words\n    sentence = tester.tokenizer.preprocess(sentence)\n    words = tester.tokenizer.word_split(sentence)\n\n    context = \"\"\n    # Keep track for predictions counts with a local scorer, for this sentence\n    scorer = Scorer(domains=[None], track_mistakes=tester.track_mistakes)\n    while words and len(context) < MAX_CHAR_PER_SENTENCE:\n        # Before randomly generating typo, set the random state for determinism\n        random.setstate(rnd_state)\n\n        # It's slow to generate swipe gesture every sentence, so run it just sometimes\n        word_to_swipe = words[0]\n        swipe_gesture = tester.noisy.swipe(word_to_swipe) if sample(SWIPE_PROB) else None\n\n        # Generate noisy keystrokes for the next word(s)\n        keystrokes, typed_word, n_word_typed, typos = tester.noisy.type_till_space(words)\n\n        # Get the clean word(s), update the remaining words to type and get the next word\n        actual_word = \" \".join(words[:n_word_typed])\n        words = words[n_word_typed:]\n        next_word = words[0] if len(words) > 0 else None\n\n        # We are done with generating typo, save the random state for the next iteration\n        rnd_state = random.getstate()\n\n        if swipe_gesture:\n            # Call the swipe model\n            preds, memory, runtime = tester.corrector.profiled_resolve_swipe(context, swipe_gesture)\n            scorer.swp(word_to_swipe, preds, context=context, memory=memory, runtime=runtime)\n\n        # Call the model for auto-completion (for long enough words)\n        if len(typed_word) > 1 and len(actual_word) > 1:\n            partial_keystrokes, partial_word = sample_partial_word(keystrokes, typed_word, actual_word)\n            preds, memory, runtime = tester.corrector.profiled_auto_complete(context, partial_keystrokes, partial_word)\n            scorer.acp(actual_word, preds, partial_word=partial_word, context=context, memory=memory, runtime=runtime)\n\n        # Call the model for auto-correction\n        preds, memory, runtime = tester.corrector.profiled_auto_correct(context, keystrokes, typed_word)\n        scorer.acr(\n            actual_word, preds, typed_word=typed_word, context=context, typos=typos, memory=memory, runtime=runtime\n        )\n\n        # Update the context for the next iteration (input forcing)\n        context = tester.tokenizer.update_context(context, actual_word)\n\n        # Call the model for next-word prediction\n        if next_word:\n            preds, memory, runtime = tester.corrector.profiled_predict_next_word(context)\n            scorer.nwp(next_word, preds, context=context, memory=memory, runtime=runtime)\n\n    return scorer\n
"},{"location":"internals/#scorerpy","title":"scorer.py","text":"

Module implementing Scorer, a class that keep track of how many errors the model is making, and output various corresponding metrics.

"},{"location":"internals/#kebbie.scorer.Count","title":"Count dataclass","text":"

Structure representing the most basic counts for a task.

It counts : * Number of correct predictions * Number of top3-correct predictions * Total number of predictions

Source code in kebbie/scorer.py
@dataclass\nclass Count:\n    \"\"\"Structure representing the most basic counts for a task.\n\n    It counts :\n    * Number of correct predictions\n    * Number of top3-correct predictions\n    * Total number of predictions\n    \"\"\"\n\n    correct: int = 0  # Number of times the first prediction was correct\n    correct_3: int = 0  # Number of times one of the top-3 predictions was correct\n    total: int = 0  # Total number of predictions\n\n    def __add__(self, count: Count) -> Count:\n        \"\"\"Merge two `Count` instance by adding their counts.\n\n        Args:\n            count (Count): Count instance to add.\n\n        Returns:\n            Merged Count.\n        \"\"\"\n        return Count(\n            correct=self.correct + count.correct,\n            correct_3=self.correct_3 + count.correct_3,\n            total=self.total + count.total,\n        )\n\n    def __mul__(self, proportion: float) -> Count:\n        \"\"\"Multiply the current `Count` instance by a given proportion.\n\n        Args:\n            proportion (float): Proportion to multiply by.\n\n        Returns:\n            Count with the right proportion.\n        \"\"\"\n        return Count(\n            correct=round(self.correct * proportion),\n            correct_3=round(self.correct_3 * proportion),\n            total=round(self.total * proportion),\n        )\n
"},{"location":"internals/#kebbie.scorer.Count.__add__","title":"__add__(count)","text":"

Merge two Count instance by adding their counts.

Parameters:

Name Type Description Default count Count

Count instance to add.

required

Returns:

Type Description Count

Merged Count.

Source code in kebbie/scorer.py
def __add__(self, count: Count) -> Count:\n    \"\"\"Merge two `Count` instance by adding their counts.\n\n    Args:\n        count (Count): Count instance to add.\n\n    Returns:\n        Merged Count.\n    \"\"\"\n    return Count(\n        correct=self.correct + count.correct,\n        correct_3=self.correct_3 + count.correct_3,\n        total=self.total + count.total,\n    )\n
"},{"location":"internals/#kebbie.scorer.Count.__mul__","title":"__mul__(proportion)","text":"

Multiply the current Count instance by a given proportion.

Parameters:

Name Type Description Default proportion float

Proportion to multiply by.

required

Returns:

Type Description Count

Count with the right proportion.

Source code in kebbie/scorer.py
def __mul__(self, proportion: float) -> Count:\n    \"\"\"Multiply the current `Count` instance by a given proportion.\n\n    Args:\n        proportion (float): Proportion to multiply by.\n\n    Returns:\n        Count with the right proportion.\n    \"\"\"\n    return Count(\n        correct=round(self.correct * proportion),\n        correct_3=round(self.correct_3 * proportion),\n        total=round(self.total * proportion),\n    )\n
"},{"location":"internals/#kebbie.scorer.Mistake","title":"Mistake dataclass","text":"

Structure representing a mistake (including the context of the mistake, the expected word and the predictions).

Source code in kebbie/scorer.py
@dataclass(eq=True, frozen=True)\nclass Mistake:\n    \"\"\"Structure representing a mistake (including the context of the mistake,\n    the expected word and the predictions).\n    \"\"\"\n\n    actual: str = field(compare=True)\n    preds: List[str] = field(compare=False)\n    context: str = field(compare=False)\n
"},{"location":"internals/#kebbie.scorer.Scorer","title":"Scorer","text":"

Class keeping track of the predictions and how correct they are, but also computing the associated score for each task after the end of test.

Parameters:

Name Type Description Default domains List[str]

The list of domains in the dataset. The Scorer keeps track of the score for each domain, so that we can spot discrepancies between domain, if any.

required human_readable bool

If set to False, performance metrics (memory, runtime) are kept in their raw, numeral form. If set to True, these are converted to a human readable string.

True track_mistakes bool

Set to True for tracking the most common mistakes.

False Source code in kebbie/scorer.py
class Scorer:\n    \"\"\"Class keeping track of the predictions and how correct they are, but\n    also computing the associated score for each task after the end of test.\n\n    Args:\n        domains (List[str]): The list of domains in the dataset. The Scorer\n            keeps track of the score for each domain, so that we can spot\n            discrepancies between domain, if any.\n        human_readable (bool, optional): If set to `False`, performance metrics\n            (memory, runtime) are kept in their raw, numeral form. If set to\n            `True`, these are converted to a human readable string.\n        track_mistakes (bool, optional): Set to `True` for tracking the most\n            common mistakes.\n    \"\"\"\n\n    def __init__(self, domains: List[str], human_readable: bool = True, track_mistakes: bool = False) -> None:\n        self.human_readable = human_readable\n\n        # For each task, create a dictionary of Counts\n        # Each task has a different structure :\n\n        # Next-word prediction : [domain] -> counts\n        self.nwp_c = dd_x_layers(1)\n\n        # Autocompletion : [domain] -> [typo/no_typo] -> [word_completion_rate] -> counts\n        self.acp_c = dd_x_layers(3)\n\n        # Autocorrection : [domain] -> [typo type/number of typo] -> counts\n        self.acr_c = dd_x_layers(2)\n\n        # Swipe resolution : [domain] -> counts\n        self.swp_c = dd_x_layers(1)\n\n        # Make sure we track each domain (create a 0-Count for each domain)\n        for d in domains:\n            _ = self.nwp_c[d], self.acp_c[d][WITH_TYPO][0], self.acr_c[d][None], self.swp_c[d]\n\n        # Also keep track of memories & runtimes\n        self.nwp_memories = []\n        self.acp_memories = []\n        self.acr_memories = []\n        self.swp_memories = []\n        self.nwp_runtimes = []\n        self.acp_runtimes = []\n        self.acr_runtimes = []\n        self.swp_runtimes = []\n\n        # Optionally track common mistakes\n        self.track_mistakes = track_mistakes\n        self.nwp_mistakes = Counter()\n        self.acp_mistakes = Counter()\n        self.acr_mistakes = Counter()\n        self.swp_mistakes = Counter()\n\n    def add(self, scorer) -> None:\n        \"\"\"Method to update the current Scorer with the counts from another\n        Scorer.\n\n        Args:\n            scorer (Scorer): Scorer to add.\n        \"\"\"\n\n        def update(d1, d2):\n            for k in d2:\n                if isinstance(d2[k], Count):\n                    d1[k] += d2[k]\n                else:\n                    update(d1[k], d2[k])\n\n        update(self.nwp_c, scorer.nwp_c)\n        update(self.acp_c, scorer.acp_c)\n        update(self.acr_c, scorer.acr_c)\n        update(self.swp_c, scorer.swp_c)\n        self.nwp_memories.extend(scorer.nwp_memories)\n        self.acp_memories.extend(scorer.acp_memories)\n        self.acr_memories.extend(scorer.acr_memories)\n        self.swp_memories.extend(scorer.swp_memories)\n        self.nwp_runtimes.extend(scorer.nwp_runtimes)\n        self.acp_runtimes.extend(scorer.acp_runtimes)\n        self.acr_runtimes.extend(scorer.acr_runtimes)\n        self.swp_runtimes.extend(scorer.swp_runtimes)\n        self.nwp_mistakes.update(scorer.nwp_mistakes)\n        self.acp_mistakes.update(scorer.acp_mistakes)\n        self.acr_mistakes.update(scorer.acr_mistakes)\n        self.swp_mistakes.update(scorer.swp_mistakes)\n\n    def nwp(\n        self,\n        true_word: str,\n        predicted_words: List[str],\n        context: str,\n        memory: int,\n        runtime: int,\n        domain: Optional[str] = None,\n    ) -> None:\n        \"\"\"Method used to record a prediction for the next-word prediction\n        task.\n\n        Args:\n            true_word (str): The label (clean word to predict).\n            predicted_words (List[str]): Predictions of the model.\n            context (str): The context (previous words in the sentence).\n            memory (int): Memory consumption for the call of the model.\n            runtime (int): Runtime for the call of the model.\n            domain (str): Domain of this prediction.\n        \"\"\"\n        # Record memory & runtime\n        if memory >= 0:\n            self.nwp_memories.append(memory)\n        if runtime >= 0:\n            self.nwp_runtimes.append(runtime)\n\n        # Record counts\n        if len(predicted_words) > 0 and predicted_words[0] == true_word:\n            self.nwp_c[domain].correct += 1\n        if true_word in predicted_words[:3]:\n            self.nwp_c[domain].correct_3 += 1\n        else:\n            # If the word is not in the top-3 predictions, this is a mistake\n            if self.track_mistakes:\n                self.nwp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])\n\n        self.nwp_c[domain].total += 1\n\n    def acp(\n        self,\n        true_word: str,\n        predicted_words: List[str],\n        partial_word: str,\n        context: str,\n        memory: int,\n        runtime: int,\n        domain: Optional[str] = None,\n    ) -> None:\n        \"\"\"Method used to record a prediction for the auto-completion task.\n\n        Args:\n            true_word (str): The label (clean word to predict).\n            predicted_words (List[str]): Predictions of the model.\n            partial_word (str): The input sent to the model (only part of the\n                word to predict, with potential typos).\n            context (str): The context (previous words in the sentence).\n            memory (int): Memory consumption for the call of the model.\n            runtime (int): Runtime for the call of the model.\n            domain (str): Domain of this prediction.\n        \"\"\"\n        # Record memory & runtime\n        if memory >= 0:\n            self.acp_memories.append(memory)\n        if runtime >= 0:\n            self.acp_runtimes.append(runtime)\n\n        # Check if a typo was introduced or not\n        has_typo = WITHOUT_TYPO if true_word.startswith(partial_word) else WITH_TYPO\n\n        # Compute the completion rate\n        completion_rate = round(len(partial_word) / len(true_word), 2)\n\n        # Record counts\n        if len(predicted_words) > 0 and predicted_words[0] == true_word:\n            self.acp_c[domain][has_typo][completion_rate].correct += 1\n        if true_word in predicted_words[:3]:\n            self.acp_c[domain][has_typo][completion_rate].correct_3 += 1\n        else:\n            # If the word is not in the top-3 predictions, this is a mistake\n            if self.track_mistakes:\n                self.acp_mistakes.update(\n                    [Mistake(actual=true_word, preds=predicted_words[:3], context=f\"{context}{partial_word}\")]\n                )\n\n        self.acp_c[domain][has_typo][completion_rate].total += 1\n\n    def acr(\n        self,\n        true_word: str,\n        predicted_words: List[str],\n        typed_word: str,\n        context: str,\n        typos: List[Typo],\n        memory: int,\n        runtime: int,\n        domain: Optional[str] = None,\n    ) -> None:\n        \"\"\"Method used to record a prediction for the auto-correction task.\n\n        Args:\n            true_word (str): The label (clean word to predict).\n            predicted_words (List[str]): Predictions of the model.\n            typed_word (str): The word typed, containing potential typos.\n            context (str): The context (previous words in the sentence).\n            typos (List[Typo]): List of typos introduced.\n            memory (int): Memory consumption for the call of the model.\n            runtime (int): Runtime for the call of the model.\n            domain (str): Domain of this prediction.\n        \"\"\"\n        # Record memory & runtime\n        if memory >= 0:\n            self.acr_memories.append(memory)\n        if runtime >= 0:\n            self.acr_runtimes.append(runtime)\n\n        # Get the type of typo\n        if not typos:\n            typo_type = None\n        elif len(typos) == 1:\n            typo_type = typos[0]\n        else:\n            typo_type = len(typos)\n\n        # Record counts\n        if len(predicted_words) > 0 and predicted_words[0] == true_word:\n            self.acr_c[domain][typo_type].correct += 1\n        if true_word in predicted_words[:3]:\n            self.acr_c[domain][typo_type].correct_3 += 1\n        else:\n            # If the word is not in the top-3 predictions, this is a mistake\n            if self.track_mistakes:\n                self.acr_mistakes.update(\n                    [Mistake(actual=true_word, preds=predicted_words[:3], context=f\"{context}{typed_word}\")]\n                )\n\n        self.acr_c[domain][typo_type].total += 1\n\n    def swp(\n        self,\n        true_word: str,\n        predicted_words: List[str],\n        context: str,\n        memory: int,\n        runtime: int,\n        domain: Optional[str] = None,\n    ) -> None:\n        \"\"\"Method used to record a prediction for the swipe resolution task.\n\n        Args:\n            true_word (str): The label (clean word to predict).\n            predicted_words (List[str]): Predictions of the model.\n            context (str): The context (previous words in the sentence).\n            memory (int): Memory consumption for the call of the model.\n            runtime (int): Runtime for the call of the model.\n            domain (str): Domain of this prediction.\n        \"\"\"\n        # Record memory & runtime\n        if memory >= 0:\n            self.swp_memories.append(memory)\n        if runtime >= 0:\n            self.swp_runtimes.append(runtime)\n\n        # Record counts\n        if len(predicted_words) > 0 and predicted_words[0] == true_word:\n            self.swp_c[domain].correct += 1\n        if true_word in predicted_words[:3]:\n            self.swp_c[domain].correct_3 += 1\n        else:\n            # If the word is not in the top-3 predictions, this is a mistake\n            if self.track_mistakes:\n                self.swp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])\n\n        self.swp_c[domain].total += 1\n\n    def set_domain(self, domain: str) -> None:\n        \"\"\"Method setting the domain for the scores associated with no domain.\n\n        To make it easier to score a single sentence, it's possible to call the\n        scorer without a domain (see signature of `nwp()`, `acp()`, `acr()`).\n        In this case the scores are associated to no domain (`None` key).\n        This method allows the user to set the domain name for these scores\n        with no domain (effectively moving the `None` domain scores to the\n        given domain name).\n\n        Note:\n            If some scores were already linked to the given domain, these\n            scores will be erased (replaced by the scores of the `None`\n            domain).\n\n        Args:\n            domain (str): Domain name to associate the scores to.\n        \"\"\"\n        if None in self.nwp_c:\n            self.nwp_c[domain] = self.nwp_c.pop(None)\n        if None in self.acp_c:\n            self.acp_c[domain] = self.acp_c.pop(None)\n        if None in self.acr_c:\n            self.acr_c[domain] = self.acr_c.pop(None)\n        if None in self.swp_c:\n            self.swp_c[domain] = self.swp_c.pop(None)\n\n    def _score_accuracy(self, c: Count) -> Dict:\n        \"\"\"Helper method to compute the accuracy given a prediction count.\n\n        This method return a dictionary with 3 metrics :\n         * Accuracy\n         * Top3 accuracy\n         * Total number of predictions\n\n        Args:\n            c (Count): Count object to use to compute the accuracy.\n\n        Returns:\n            Dictionary with the computed metrics.\n        \"\"\"\n        return {\n            \"accuracy\": round_to_n(c.correct / c.total) if c.total != 0 else 0,\n            \"top3_accuracy\": round_to_n(c.correct_3 / c.total) if c.total != 0 else 0,\n            \"n\": c.total,\n        }\n\n    def _score_precision_recall(self, no_typo_c: Count, typo_c: Count, beta: float) -> Dict:\n        \"\"\"Helper method to compute the precision and recall for\n        auto-correction.\n\n        This method return a dictionary with several metrics :\n         * Accuracy\n         * Precision\n         * Recall\n         * F-score\n         * Top3 accuracy\n         * Top3 precision\n         * Top3 recall\n         * Top3 F-score\n         * Number of predictions with a typo\n         * Total number of predictions\n\n        For auto-correction, we need 2 Count objects : the counts of typos, and\n        the counts of non-typo (to compute the True Negative and False Positive\n        metrics).\n\n        Args:\n            no_typo_c (Count): Count object for the predictions where no typo\n                were added.\n            typo_c (Count): Count object for the predictions where typos were\n                added.\n            beta (float): Beta to use for computing the F-beta score.\n\n        Returns:\n            Dictionary with the computed metrics.\n        \"\"\"\n        # The first step is to divide the counts into TN, FP, TP, FN\n        tn = no_typo_c.correct\n        fp = no_typo_c.total - no_typo_c.correct\n        tp = typo_c.correct\n        fn = typo_c.total - typo_c.correct\n\n        tn_3 = no_typo_c.correct_3\n        fp_3 = no_typo_c.total - no_typo_c.correct_3\n        tp_3 = typo_c.correct_3\n        fn_3 = typo_c.total - typo_c.correct_3\n\n        # Then we compute the metrics\n        p = precision(tp=tp, fp=fp)\n        r = recall(tp=tp, fn=fn)\n\n        p_3 = precision(tp=tp_3, fp=fp_3)\n        r_3 = recall(tp=tp_3, fn=fn_3)\n\n        return {\n            \"accuracy\": round_to_n(accuracy(tp=tp, tn=tn, fp=fp, fn=fn)),\n            \"precision\": round_to_n(p),\n            \"recall\": round_to_n(r),\n            \"fscore\": round_to_n(fbeta(precision=p, recall=r, beta=beta)),\n            \"top3_accuracy\": round_to_n(accuracy(tp=tp_3, tn=tn_3, fp=fp_3, fn=fn_3)),\n            \"top3_precision\": round_to_n(p_3),\n            \"top3_recall\": round_to_n(r_3),\n            \"top3_fscore\": round_to_n(fbeta(precision=p_3, recall=r_3, beta=beta)),\n            \"n_typo\": typo_c.total,\n            \"n\": no_typo_c.total + typo_c.total,\n        }\n\n    def _score_performances(self, memories: List[int], runtimes: List[int]) -> Dict:\n        \"\"\"Helper method to compute metrics related to the memory & runtime.\n\n        This method returns a dictionary with several metrics :\n         * The mean memory consumption\n         * The min memory consumption\n         * The max memory consumption\n         * The mean running time\n         * The fastest running time\n         * The slowest running time\n\n        Args:\n            memories (List[int]): List of memories consumptions for a\n                specific operation.\n            runtimes (List[int]): List of runtimes for a specific operation.\n\n        Returns:\n            Dictionary with the computed metrics.\n        \"\"\"\n        perf = {\n            \"mean_memory\": stats.mean(memories) if memories else 0,\n            \"min_memory\": min(memories) if memories else 0,\n            \"max_memory\": max(memories) if memories else 0,\n            \"mean_runtime\": stats.mean(runtimes) if runtimes else 0,\n            \"fastest_runtime\": min(runtimes) if runtimes else 0,\n            \"slowest_runtime\": max(runtimes) if runtimes else 0,\n        }\n\n        if self.human_readable:\n            perf = {\n                name: human_readable_memory(x) if name.endswith(\"memory\") else human_readable_runtime(x)\n                for name, x in perf.items()\n            }\n\n        return perf\n\n    def score(self, beta: float = DEFAULT_BETA) -> Dict:  # noqa: C901\n        \"\"\"Method that computes the final scores (as well as some alternative\n        metrics that can bring insight in the capabilities of the model), and\n        output these in an organized dictionary.\n\n        Args:\n            beta (float, optional): Beta to use for computing the F-beta score.\n\n        Returns:\n            Dictionary containing the computed scores and metrics for the\n            model tested.\n        \"\"\"\n        # --- Next-word prediction ---\n        # Group scores by domain\n        per = defaultdict(Count)\n        for domain, c in self.nwp_c.items():\n            per[domain] += c\n        total_c = sum(per.values(), Count())\n        per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n        # Task results\n        nwp = {\n            \"score\": self._score_accuracy(total_c),\n            \"per_domain\": per_domain,\n            \"performances\": self._score_performances(self.nwp_memories, self.nwp_runtimes),\n        }\n\n        # --- Auto-completion ---\n        # Group scores by domain\n        per = defaultdict(Count)\n        for domain, d1 in self.acp_c.items():\n            for has_typo, d2 in d1.items():\n                for compl_rate, c in d2.items():\n                    per[domain] += c\n        total_c = sum(per.values(), Count())\n        per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n        # Group scores by completion rate\n        per = defaultdict(Count)\n        for domain, d1 in self.acp_c.items():\n            for has_typo, d2 in d1.items():\n                for compl_rate, c in d2.items():\n                    per[compl_rate] += c\n        per_compl_rate = {\n            \"<25%\": self._score_accuracy(sum((c for k, c in per.items() if k < 0.25), Count())),\n            \"25%~50%\": self._score_accuracy(sum((c for k, c in per.items() if 0.25 <= k < 0.5), Count())),\n            \"50%~75%\": self._score_accuracy(sum((c for k, c in per.items() if 0.5 <= k < 0.75), Count())),\n            \">75%\": self._score_accuracy(sum((c for k, c in per.items() if 0.75 <= k), Count())),\n        }\n\n        # Group scores by with_typo / without_typo\n        per = defaultdict(Count)\n        for domain, d1 in self.acp_c.items():\n            for has_typo, d2 in d1.items():\n                for compl_rate, c in d2.items():\n                    per[has_typo] += c\n        per_other = {k: self._score_accuracy(per[k]) for k in [WITHOUT_TYPO, WITH_TYPO]}\n\n        # Task results\n        acp = {\n            \"score\": self._score_accuracy(total_c),\n            \"per_domain\": per_domain,\n            \"per_completion_rate\": per_compl_rate,\n            \"per_other\": per_other,\n            \"performances\": self._score_performances(self.acp_memories, self.acp_runtimes),\n        }\n\n        # --- Auto-correction ---\n        # Group scores by domain\n        no_typo_per, typo_per = defaultdict(Count), defaultdict(Count)\n        for domain, d1 in self.acr_c.items():\n            for typo, c in d1.items():\n                if typo is None:\n                    no_typo_per[domain] += c\n                else:\n                    typo_per[domain] += c\n        no_typo_total_c = sum(no_typo_per.values(), Count())\n        typo_total_c = sum(typo_per.values(), Count())\n        per_domain = {k: self._score_precision_recall(no_typo_per[k], typo_per[k], beta=beta) for k in no_typo_per}\n\n        # Group scores by typo type\n        no_typo_c, typo_per = Count(), defaultdict(Count)\n        for domain, d1 in self.acr_c.items():\n            for typo, c in d1.items():\n                if typo is None:\n                    no_typo_c += c\n                else:\n                    typo_per[typo] += c\n        # Divide the total count of no-typo into each type of typos with the right proportions\n        no_typo_per = defaultdict(Count, {k: no_typo_c * (c.total / typo_total_c.total) for k, c in typo_per.items()})\n        per_typo_type = {t.name: self._score_precision_recall(no_typo_per[t], typo_per[t], beta=beta) for t in Typo}\n        per_n_typo = {\n            \"1\": self._score_precision_recall(\n                sum((c for k, c in no_typo_per.items() if isinstance(k, Typo)), Count()),\n                sum((c for k, c in typo_per.items() if isinstance(k, Typo)), Count()),\n                beta=beta,\n            ),\n            \"2\": self._score_precision_recall(no_typo_per[2], typo_per[2], beta=beta),\n            \"3+\": self._score_precision_recall(\n                sum((c for k, c in no_typo_per.items() if isinstance(k, int) and k > 2), Count()),\n                sum((c for k, c in typo_per.items() if isinstance(k, int) and k > 2), Count()),\n                beta=beta,\n            ),\n        }\n\n        # Task results\n        acr = {\n            \"score\": self._score_precision_recall(no_typo_total_c, typo_total_c, beta=beta),\n            \"per_domain\": per_domain,\n            \"per_typo_type\": per_typo_type,\n            \"per_number_of_typos\": per_n_typo,\n            \"performances\": self._score_performances(self.acr_memories, self.acr_runtimes),\n        }\n\n        # --- Swipe resolution ---\n        # Group scores by domain\n        per = defaultdict(Count)\n        for domain, c in self.swp_c.items():\n            per[domain] += c\n        total_c = sum(per.values(), Count())\n        per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n        # Task results\n        swp = {\n            \"score\": self._score_accuracy(total_c),\n            \"per_domain\": per_domain,\n            \"performances\": self._score_performances(self.swp_memories, self.swp_runtimes),\n        }\n\n        # Final results\n        results = {\n            \"next_word_prediction\": nwp,\n            \"auto_completion\": acp,\n            \"auto_correction\": acr,\n            \"swipe_resolution\": swp,\n        }\n\n        # Add the overall score\n        results[\"overall_score\"] = one_score(results)\n\n        return results\n
"},{"location":"internals/#kebbie.scorer.Scorer.add","title":"add(scorer)","text":"

Method to update the current Scorer with the counts from another Scorer.

Parameters:

Name Type Description Default scorer Scorer

Scorer to add.

required Source code in kebbie/scorer.py
def add(self, scorer) -> None:\n    \"\"\"Method to update the current Scorer with the counts from another\n    Scorer.\n\n    Args:\n        scorer (Scorer): Scorer to add.\n    \"\"\"\n\n    def update(d1, d2):\n        for k in d2:\n            if isinstance(d2[k], Count):\n                d1[k] += d2[k]\n            else:\n                update(d1[k], d2[k])\n\n    update(self.nwp_c, scorer.nwp_c)\n    update(self.acp_c, scorer.acp_c)\n    update(self.acr_c, scorer.acr_c)\n    update(self.swp_c, scorer.swp_c)\n    self.nwp_memories.extend(scorer.nwp_memories)\n    self.acp_memories.extend(scorer.acp_memories)\n    self.acr_memories.extend(scorer.acr_memories)\n    self.swp_memories.extend(scorer.swp_memories)\n    self.nwp_runtimes.extend(scorer.nwp_runtimes)\n    self.acp_runtimes.extend(scorer.acp_runtimes)\n    self.acr_runtimes.extend(scorer.acr_runtimes)\n    self.swp_runtimes.extend(scorer.swp_runtimes)\n    self.nwp_mistakes.update(scorer.nwp_mistakes)\n    self.acp_mistakes.update(scorer.acp_mistakes)\n    self.acr_mistakes.update(scorer.acr_mistakes)\n    self.swp_mistakes.update(scorer.swp_mistakes)\n
"},{"location":"internals/#kebbie.scorer.Scorer.nwp","title":"nwp(true_word, predicted_words, context, memory, runtime, domain=None)","text":"

Method used to record a prediction for the next-word prediction task.

Parameters:

Name Type Description Default true_word str

The label (clean word to predict).

required predicted_words List[str]

Predictions of the model.

required context str

The context (previous words in the sentence).

required memory int

Memory consumption for the call of the model.

required runtime int

Runtime for the call of the model.

required domain str

Domain of this prediction.

None Source code in kebbie/scorer.py
def nwp(\n    self,\n    true_word: str,\n    predicted_words: List[str],\n    context: str,\n    memory: int,\n    runtime: int,\n    domain: Optional[str] = None,\n) -> None:\n    \"\"\"Method used to record a prediction for the next-word prediction\n    task.\n\n    Args:\n        true_word (str): The label (clean word to predict).\n        predicted_words (List[str]): Predictions of the model.\n        context (str): The context (previous words in the sentence).\n        memory (int): Memory consumption for the call of the model.\n        runtime (int): Runtime for the call of the model.\n        domain (str): Domain of this prediction.\n    \"\"\"\n    # Record memory & runtime\n    if memory >= 0:\n        self.nwp_memories.append(memory)\n    if runtime >= 0:\n        self.nwp_runtimes.append(runtime)\n\n    # Record counts\n    if len(predicted_words) > 0 and predicted_words[0] == true_word:\n        self.nwp_c[domain].correct += 1\n    if true_word in predicted_words[:3]:\n        self.nwp_c[domain].correct_3 += 1\n    else:\n        # If the word is not in the top-3 predictions, this is a mistake\n        if self.track_mistakes:\n            self.nwp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])\n\n    self.nwp_c[domain].total += 1\n
"},{"location":"internals/#kebbie.scorer.Scorer.acp","title":"acp(true_word, predicted_words, partial_word, context, memory, runtime, domain=None)","text":"

Method used to record a prediction for the auto-completion task.

Parameters:

Name Type Description Default true_word str

The label (clean word to predict).

required predicted_words List[str]

Predictions of the model.

required partial_word str

The input sent to the model (only part of the word to predict, with potential typos).

required context str

The context (previous words in the sentence).

required memory int

Memory consumption for the call of the model.

required runtime int

Runtime for the call of the model.

required domain str

Domain of this prediction.

None Source code in kebbie/scorer.py
def acp(\n    self,\n    true_word: str,\n    predicted_words: List[str],\n    partial_word: str,\n    context: str,\n    memory: int,\n    runtime: int,\n    domain: Optional[str] = None,\n) -> None:\n    \"\"\"Method used to record a prediction for the auto-completion task.\n\n    Args:\n        true_word (str): The label (clean word to predict).\n        predicted_words (List[str]): Predictions of the model.\n        partial_word (str): The input sent to the model (only part of the\n            word to predict, with potential typos).\n        context (str): The context (previous words in the sentence).\n        memory (int): Memory consumption for the call of the model.\n        runtime (int): Runtime for the call of the model.\n        domain (str): Domain of this prediction.\n    \"\"\"\n    # Record memory & runtime\n    if memory >= 0:\n        self.acp_memories.append(memory)\n    if runtime >= 0:\n        self.acp_runtimes.append(runtime)\n\n    # Check if a typo was introduced or not\n    has_typo = WITHOUT_TYPO if true_word.startswith(partial_word) else WITH_TYPO\n\n    # Compute the completion rate\n    completion_rate = round(len(partial_word) / len(true_word), 2)\n\n    # Record counts\n    if len(predicted_words) > 0 and predicted_words[0] == true_word:\n        self.acp_c[domain][has_typo][completion_rate].correct += 1\n    if true_word in predicted_words[:3]:\n        self.acp_c[domain][has_typo][completion_rate].correct_3 += 1\n    else:\n        # If the word is not in the top-3 predictions, this is a mistake\n        if self.track_mistakes:\n            self.acp_mistakes.update(\n                [Mistake(actual=true_word, preds=predicted_words[:3], context=f\"{context}{partial_word}\")]\n            )\n\n    self.acp_c[domain][has_typo][completion_rate].total += 1\n
"},{"location":"internals/#kebbie.scorer.Scorer.acr","title":"acr(true_word, predicted_words, typed_word, context, typos, memory, runtime, domain=None)","text":"

Method used to record a prediction for the auto-correction task.

Parameters:

Name Type Description Default true_word str

The label (clean word to predict).

required predicted_words List[str]

Predictions of the model.

required typed_word str

The word typed, containing potential typos.

required context str

The context (previous words in the sentence).

required typos List[Typo]

List of typos introduced.

required memory int

Memory consumption for the call of the model.

required runtime int

Runtime for the call of the model.

required domain str

Domain of this prediction.

None Source code in kebbie/scorer.py
def acr(\n    self,\n    true_word: str,\n    predicted_words: List[str],\n    typed_word: str,\n    context: str,\n    typos: List[Typo],\n    memory: int,\n    runtime: int,\n    domain: Optional[str] = None,\n) -> None:\n    \"\"\"Method used to record a prediction for the auto-correction task.\n\n    Args:\n        true_word (str): The label (clean word to predict).\n        predicted_words (List[str]): Predictions of the model.\n        typed_word (str): The word typed, containing potential typos.\n        context (str): The context (previous words in the sentence).\n        typos (List[Typo]): List of typos introduced.\n        memory (int): Memory consumption for the call of the model.\n        runtime (int): Runtime for the call of the model.\n        domain (str): Domain of this prediction.\n    \"\"\"\n    # Record memory & runtime\n    if memory >= 0:\n        self.acr_memories.append(memory)\n    if runtime >= 0:\n        self.acr_runtimes.append(runtime)\n\n    # Get the type of typo\n    if not typos:\n        typo_type = None\n    elif len(typos) == 1:\n        typo_type = typos[0]\n    else:\n        typo_type = len(typos)\n\n    # Record counts\n    if len(predicted_words) > 0 and predicted_words[0] == true_word:\n        self.acr_c[domain][typo_type].correct += 1\n    if true_word in predicted_words[:3]:\n        self.acr_c[domain][typo_type].correct_3 += 1\n    else:\n        # If the word is not in the top-3 predictions, this is a mistake\n        if self.track_mistakes:\n            self.acr_mistakes.update(\n                [Mistake(actual=true_word, preds=predicted_words[:3], context=f\"{context}{typed_word}\")]\n            )\n\n    self.acr_c[domain][typo_type].total += 1\n
"},{"location":"internals/#kebbie.scorer.Scorer.swp","title":"swp(true_word, predicted_words, context, memory, runtime, domain=None)","text":"

Method used to record a prediction for the swipe resolution task.

Parameters:

Name Type Description Default true_word str

The label (clean word to predict).

required predicted_words List[str]

Predictions of the model.

required context str

The context (previous words in the sentence).

required memory int

Memory consumption for the call of the model.

required runtime int

Runtime for the call of the model.

required domain str

Domain of this prediction.

None Source code in kebbie/scorer.py
def swp(\n    self,\n    true_word: str,\n    predicted_words: List[str],\n    context: str,\n    memory: int,\n    runtime: int,\n    domain: Optional[str] = None,\n) -> None:\n    \"\"\"Method used to record a prediction for the swipe resolution task.\n\n    Args:\n        true_word (str): The label (clean word to predict).\n        predicted_words (List[str]): Predictions of the model.\n        context (str): The context (previous words in the sentence).\n        memory (int): Memory consumption for the call of the model.\n        runtime (int): Runtime for the call of the model.\n        domain (str): Domain of this prediction.\n    \"\"\"\n    # Record memory & runtime\n    if memory >= 0:\n        self.swp_memories.append(memory)\n    if runtime >= 0:\n        self.swp_runtimes.append(runtime)\n\n    # Record counts\n    if len(predicted_words) > 0 and predicted_words[0] == true_word:\n        self.swp_c[domain].correct += 1\n    if true_word in predicted_words[:3]:\n        self.swp_c[domain].correct_3 += 1\n    else:\n        # If the word is not in the top-3 predictions, this is a mistake\n        if self.track_mistakes:\n            self.swp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])\n\n    self.swp_c[domain].total += 1\n
"},{"location":"internals/#kebbie.scorer.Scorer.set_domain","title":"set_domain(domain)","text":"

Method setting the domain for the scores associated with no domain.

To make it easier to score a single sentence, it's possible to call the scorer without a domain (see signature of nwp(), acp(), acr()). In this case the scores are associated to no domain (None key). This method allows the user to set the domain name for these scores with no domain (effectively moving the None domain scores to the given domain name).

Note

If some scores were already linked to the given domain, these scores will be erased (replaced by the scores of the None domain).

Parameters:

Name Type Description Default domain str

Domain name to associate the scores to.

required Source code in kebbie/scorer.py
def set_domain(self, domain: str) -> None:\n    \"\"\"Method setting the domain for the scores associated with no domain.\n\n    To make it easier to score a single sentence, it's possible to call the\n    scorer without a domain (see signature of `nwp()`, `acp()`, `acr()`).\n    In this case the scores are associated to no domain (`None` key).\n    This method allows the user to set the domain name for these scores\n    with no domain (effectively moving the `None` domain scores to the\n    given domain name).\n\n    Note:\n        If some scores were already linked to the given domain, these\n        scores will be erased (replaced by the scores of the `None`\n        domain).\n\n    Args:\n        domain (str): Domain name to associate the scores to.\n    \"\"\"\n    if None in self.nwp_c:\n        self.nwp_c[domain] = self.nwp_c.pop(None)\n    if None in self.acp_c:\n        self.acp_c[domain] = self.acp_c.pop(None)\n    if None in self.acr_c:\n        self.acr_c[domain] = self.acr_c.pop(None)\n    if None in self.swp_c:\n        self.swp_c[domain] = self.swp_c.pop(None)\n
"},{"location":"internals/#kebbie.scorer.Scorer._score_accuracy","title":"_score_accuracy(c)","text":"

Helper method to compute the accuracy given a prediction count.

This method return a dictionary with 3 metrics
  • Accuracy
  • Top3 accuracy
  • Total number of predictions

Parameters:

Name Type Description Default c Count

Count object to use to compute the accuracy.

required

Returns:

Type Description Dict

Dictionary with the computed metrics.

Source code in kebbie/scorer.py
def _score_accuracy(self, c: Count) -> Dict:\n    \"\"\"Helper method to compute the accuracy given a prediction count.\n\n    This method return a dictionary with 3 metrics :\n     * Accuracy\n     * Top3 accuracy\n     * Total number of predictions\n\n    Args:\n        c (Count): Count object to use to compute the accuracy.\n\n    Returns:\n        Dictionary with the computed metrics.\n    \"\"\"\n    return {\n        \"accuracy\": round_to_n(c.correct / c.total) if c.total != 0 else 0,\n        \"top3_accuracy\": round_to_n(c.correct_3 / c.total) if c.total != 0 else 0,\n        \"n\": c.total,\n    }\n
"},{"location":"internals/#kebbie.scorer.Scorer._score_precision_recall","title":"_score_precision_recall(no_typo_c, typo_c, beta)","text":"

Helper method to compute the precision and recall for auto-correction.

This method return a dictionary with several metrics
  • Accuracy
  • Precision
  • Recall
  • F-score
  • Top3 accuracy
  • Top3 precision
  • Top3 recall
  • Top3 F-score
  • Number of predictions with a typo
  • Total number of predictions

For auto-correction, we need 2 Count objects : the counts of typos, and the counts of non-typo (to compute the True Negative and False Positive metrics).

Parameters:

Name Type Description Default no_typo_c Count

Count object for the predictions where no typo were added.

required typo_c Count

Count object for the predictions where typos were added.

required beta float

Beta to use for computing the F-beta score.

required

Returns:

Type Description Dict

Dictionary with the computed metrics.

Source code in kebbie/scorer.py
def _score_precision_recall(self, no_typo_c: Count, typo_c: Count, beta: float) -> Dict:\n    \"\"\"Helper method to compute the precision and recall for\n    auto-correction.\n\n    This method return a dictionary with several metrics :\n     * Accuracy\n     * Precision\n     * Recall\n     * F-score\n     * Top3 accuracy\n     * Top3 precision\n     * Top3 recall\n     * Top3 F-score\n     * Number of predictions with a typo\n     * Total number of predictions\n\n    For auto-correction, we need 2 Count objects : the counts of typos, and\n    the counts of non-typo (to compute the True Negative and False Positive\n    metrics).\n\n    Args:\n        no_typo_c (Count): Count object for the predictions where no typo\n            were added.\n        typo_c (Count): Count object for the predictions where typos were\n            added.\n        beta (float): Beta to use for computing the F-beta score.\n\n    Returns:\n        Dictionary with the computed metrics.\n    \"\"\"\n    # The first step is to divide the counts into TN, FP, TP, FN\n    tn = no_typo_c.correct\n    fp = no_typo_c.total - no_typo_c.correct\n    tp = typo_c.correct\n    fn = typo_c.total - typo_c.correct\n\n    tn_3 = no_typo_c.correct_3\n    fp_3 = no_typo_c.total - no_typo_c.correct_3\n    tp_3 = typo_c.correct_3\n    fn_3 = typo_c.total - typo_c.correct_3\n\n    # Then we compute the metrics\n    p = precision(tp=tp, fp=fp)\n    r = recall(tp=tp, fn=fn)\n\n    p_3 = precision(tp=tp_3, fp=fp_3)\n    r_3 = recall(tp=tp_3, fn=fn_3)\n\n    return {\n        \"accuracy\": round_to_n(accuracy(tp=tp, tn=tn, fp=fp, fn=fn)),\n        \"precision\": round_to_n(p),\n        \"recall\": round_to_n(r),\n        \"fscore\": round_to_n(fbeta(precision=p, recall=r, beta=beta)),\n        \"top3_accuracy\": round_to_n(accuracy(tp=tp_3, tn=tn_3, fp=fp_3, fn=fn_3)),\n        \"top3_precision\": round_to_n(p_3),\n        \"top3_recall\": round_to_n(r_3),\n        \"top3_fscore\": round_to_n(fbeta(precision=p_3, recall=r_3, beta=beta)),\n        \"n_typo\": typo_c.total,\n        \"n\": no_typo_c.total + typo_c.total,\n    }\n
"},{"location":"internals/#kebbie.scorer.Scorer._score_performances","title":"_score_performances(memories, runtimes)","text":"

Helper method to compute metrics related to the memory & runtime.

This method returns a dictionary with several metrics
  • The mean memory consumption
  • The min memory consumption
  • The max memory consumption
  • The mean running time
  • The fastest running time
  • The slowest running time

Parameters:

Name Type Description Default memories List[int]

List of memories consumptions for a specific operation.

required runtimes List[int]

List of runtimes for a specific operation.

required

Returns:

Type Description Dict

Dictionary with the computed metrics.

Source code in kebbie/scorer.py
def _score_performances(self, memories: List[int], runtimes: List[int]) -> Dict:\n    \"\"\"Helper method to compute metrics related to the memory & runtime.\n\n    This method returns a dictionary with several metrics :\n     * The mean memory consumption\n     * The min memory consumption\n     * The max memory consumption\n     * The mean running time\n     * The fastest running time\n     * The slowest running time\n\n    Args:\n        memories (List[int]): List of memories consumptions for a\n            specific operation.\n        runtimes (List[int]): List of runtimes for a specific operation.\n\n    Returns:\n        Dictionary with the computed metrics.\n    \"\"\"\n    perf = {\n        \"mean_memory\": stats.mean(memories) if memories else 0,\n        \"min_memory\": min(memories) if memories else 0,\n        \"max_memory\": max(memories) if memories else 0,\n        \"mean_runtime\": stats.mean(runtimes) if runtimes else 0,\n        \"fastest_runtime\": min(runtimes) if runtimes else 0,\n        \"slowest_runtime\": max(runtimes) if runtimes else 0,\n    }\n\n    if self.human_readable:\n        perf = {\n            name: human_readable_memory(x) if name.endswith(\"memory\") else human_readable_runtime(x)\n            for name, x in perf.items()\n        }\n\n    return perf\n
"},{"location":"internals/#kebbie.scorer.Scorer.score","title":"score(beta=DEFAULT_BETA)","text":"

Method that computes the final scores (as well as some alternative metrics that can bring insight in the capabilities of the model), and output these in an organized dictionary.

Parameters:

Name Type Description Default beta float

Beta to use for computing the F-beta score.

DEFAULT_BETA

Returns:

Type Description Dict

Dictionary containing the computed scores and metrics for the

Dict

model tested.

Source code in kebbie/scorer.py
def score(self, beta: float = DEFAULT_BETA) -> Dict:  # noqa: C901\n    \"\"\"Method that computes the final scores (as well as some alternative\n    metrics that can bring insight in the capabilities of the model), and\n    output these in an organized dictionary.\n\n    Args:\n        beta (float, optional): Beta to use for computing the F-beta score.\n\n    Returns:\n        Dictionary containing the computed scores and metrics for the\n        model tested.\n    \"\"\"\n    # --- Next-word prediction ---\n    # Group scores by domain\n    per = defaultdict(Count)\n    for domain, c in self.nwp_c.items():\n        per[domain] += c\n    total_c = sum(per.values(), Count())\n    per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n    # Task results\n    nwp = {\n        \"score\": self._score_accuracy(total_c),\n        \"per_domain\": per_domain,\n        \"performances\": self._score_performances(self.nwp_memories, self.nwp_runtimes),\n    }\n\n    # --- Auto-completion ---\n    # Group scores by domain\n    per = defaultdict(Count)\n    for domain, d1 in self.acp_c.items():\n        for has_typo, d2 in d1.items():\n            for compl_rate, c in d2.items():\n                per[domain] += c\n    total_c = sum(per.values(), Count())\n    per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n    # Group scores by completion rate\n    per = defaultdict(Count)\n    for domain, d1 in self.acp_c.items():\n        for has_typo, d2 in d1.items():\n            for compl_rate, c in d2.items():\n                per[compl_rate] += c\n    per_compl_rate = {\n        \"<25%\": self._score_accuracy(sum((c for k, c in per.items() if k < 0.25), Count())),\n        \"25%~50%\": self._score_accuracy(sum((c for k, c in per.items() if 0.25 <= k < 0.5), Count())),\n        \"50%~75%\": self._score_accuracy(sum((c for k, c in per.items() if 0.5 <= k < 0.75), Count())),\n        \">75%\": self._score_accuracy(sum((c for k, c in per.items() if 0.75 <= k), Count())),\n    }\n\n    # Group scores by with_typo / without_typo\n    per = defaultdict(Count)\n    for domain, d1 in self.acp_c.items():\n        for has_typo, d2 in d1.items():\n            for compl_rate, c in d2.items():\n                per[has_typo] += c\n    per_other = {k: self._score_accuracy(per[k]) for k in [WITHOUT_TYPO, WITH_TYPO]}\n\n    # Task results\n    acp = {\n        \"score\": self._score_accuracy(total_c),\n        \"per_domain\": per_domain,\n        \"per_completion_rate\": per_compl_rate,\n        \"per_other\": per_other,\n        \"performances\": self._score_performances(self.acp_memories, self.acp_runtimes),\n    }\n\n    # --- Auto-correction ---\n    # Group scores by domain\n    no_typo_per, typo_per = defaultdict(Count), defaultdict(Count)\n    for domain, d1 in self.acr_c.items():\n        for typo, c in d1.items():\n            if typo is None:\n                no_typo_per[domain] += c\n            else:\n                typo_per[domain] += c\n    no_typo_total_c = sum(no_typo_per.values(), Count())\n    typo_total_c = sum(typo_per.values(), Count())\n    per_domain = {k: self._score_precision_recall(no_typo_per[k], typo_per[k], beta=beta) for k in no_typo_per}\n\n    # Group scores by typo type\n    no_typo_c, typo_per = Count(), defaultdict(Count)\n    for domain, d1 in self.acr_c.items():\n        for typo, c in d1.items():\n            if typo is None:\n                no_typo_c += c\n            else:\n                typo_per[typo] += c\n    # Divide the total count of no-typo into each type of typos with the right proportions\n    no_typo_per = defaultdict(Count, {k: no_typo_c * (c.total / typo_total_c.total) for k, c in typo_per.items()})\n    per_typo_type = {t.name: self._score_precision_recall(no_typo_per[t], typo_per[t], beta=beta) for t in Typo}\n    per_n_typo = {\n        \"1\": self._score_precision_recall(\n            sum((c for k, c in no_typo_per.items() if isinstance(k, Typo)), Count()),\n            sum((c for k, c in typo_per.items() if isinstance(k, Typo)), Count()),\n            beta=beta,\n        ),\n        \"2\": self._score_precision_recall(no_typo_per[2], typo_per[2], beta=beta),\n        \"3+\": self._score_precision_recall(\n            sum((c for k, c in no_typo_per.items() if isinstance(k, int) and k > 2), Count()),\n            sum((c for k, c in typo_per.items() if isinstance(k, int) and k > 2), Count()),\n            beta=beta,\n        ),\n    }\n\n    # Task results\n    acr = {\n        \"score\": self._score_precision_recall(no_typo_total_c, typo_total_c, beta=beta),\n        \"per_domain\": per_domain,\n        \"per_typo_type\": per_typo_type,\n        \"per_number_of_typos\": per_n_typo,\n        \"performances\": self._score_performances(self.acr_memories, self.acr_runtimes),\n    }\n\n    # --- Swipe resolution ---\n    # Group scores by domain\n    per = defaultdict(Count)\n    for domain, c in self.swp_c.items():\n        per[domain] += c\n    total_c = sum(per.values(), Count())\n    per_domain = {k: self._score_accuracy(c) for k, c in per.items()}\n\n    # Task results\n    swp = {\n        \"score\": self._score_accuracy(total_c),\n        \"per_domain\": per_domain,\n        \"performances\": self._score_performances(self.swp_memories, self.swp_runtimes),\n    }\n\n    # Final results\n    results = {\n        \"next_word_prediction\": nwp,\n        \"auto_completion\": acp,\n        \"auto_correction\": acr,\n        \"swipe_resolution\": swp,\n    }\n\n    # Add the overall score\n    results[\"overall_score\"] = one_score(results)\n\n    return results\n
"},{"location":"internals/#kebbie.scorer.dd_x_layers","title":"dd_x_layers(n_layers=1)","text":"

Helper function for creating a nested defaultdict, with a specified number of nest level. The end object is a Count.

Parameters:

Name Type Description Default n_layers int

Number of layer for the defaultdict.

1

Returns:

Type Description defaultdict

Created nested defaultdict.

Source code in kebbie/scorer.py
def dd_x_layers(n_layers: int = 1) -> defaultdict:\n    \"\"\"Helper function for creating a nested defaultdict, with a specified\n    number of nest level. The end object is a Count.\n\n    Args:\n        n_layers (int): Number of layer for the defaultdict.\n\n    Returns:\n        Created nested defaultdict.\n    \"\"\"\n    assert n_layers > 0, f\"A default dict have at least 1 layer ({n_layers} given)\"\n    if n_layers == 1:\n        return defaultdict(Count)\n    else:\n        return defaultdict(partial(dd_x_layers, n_layers=n_layers - 1))\n
"},{"location":"internals/#kebbie.scorer.one_score","title":"one_score(results)","text":"

One Score to rule them all, One Score to find them, One Score to bring them all and in the darkness bind them.

This function is here to gather the various testing metrics of a JET file in a single number, to easily compare models.

We take a single metric for each task, and weight them based on the importance of the task (these metrics already have the same scale : between 0 and 1).

For NWP and ACP we take a top-3 metric, because these tasks usually involve a user action from a proposed list. For ACR and SWP, we take a top-1 metric, since usually it's automatically applied without user input.

Parameters:

Name Type Description Default results Dict

Testing results. Should be a dictionary containing all the metrics (used to compute the one score).

required

Returns:

Type Description float

One score, computed from the results given.

Source code in kebbie/scorer.py
def one_score(results: Dict) -> float:\n    \"\"\"One Score to rule them all, One Score to find them, One Score to bring\n    them all and in the darkness bind them.\n\n    This function is here to gather the various testing metrics of a JET file\n    in a single number, to easily compare models.\n\n    We take a single metric for each task, and weight them based on the\n    importance of the task (these metrics already have the same scale : between\n    0 and 1).\n\n    For NWP and ACP we take a top-3 metric, because these tasks usually involve\n    a user action from a proposed list. For ACR and SWP, we take a top-1\n    metric, since usually it's automatically applied without user input.\n\n    Args:\n        results (Dict): Testing results. Should be a dictionary containing all\n            the metrics (used to compute the one score).\n\n    Returns:\n        One score, computed from the results given.\n    \"\"\"\n    nwp = results[\"next_word_prediction\"][\"score\"][\"top3_accuracy\"]\n    acp = results[\"auto_completion\"][\"score\"][\"top3_accuracy\"]\n    acr = results[\"auto_correction\"][\"score\"][\"fscore\"]\n    swp = results[\"swipe_resolution\"][\"score\"][\"accuracy\"]\n\n    return 0.15 * nwp + 0.2 * acp + 0.4 * acr + 0.25 * swp\n
"},{"location":"internals/#tokenizerpy","title":"tokenizer.py","text":"

Module defining BasicTokenizer, very basic tokenizer to separate a sentence into words.

"},{"location":"internals/#kebbie.tokenizer.BasicTokenizer","title":"BasicTokenizer","text":"

A basic tokenizer, used for regular latin languages. This tokenizer simply use space as word separator. Since it is used for testing only, we don't need to care about punctuations, etc...

Source code in kebbie/tokenizer.py
class BasicTokenizer:\n    \"\"\"A basic tokenizer, used for regular latin languages.\n    This tokenizer simply use space as word separator. Since it is used for\n    testing only, we don't need to care about punctuations, etc...\n    \"\"\"\n\n    def preprocess(self, sentence: str) -> str:\n        \"\"\"Method for simple preprocessing.\n\n        The goal of this function is not to provide an extensive and clean\n        preprocessing. The goal is just to normalize some characters (that\n        are not in our keyboard, so the user can't officially type them) into\n        their normal counterpart, that are in the keyboard.\n\n        Args:\n            sentence (str): String to normalize.\n\n        Returns:\n            Normalized string.\n        \"\"\"\n        # Replace things that are like \"\n        sentence = sentence.replace(\"\u201c\", '\"').replace(\"\u201d\", '\"').replace(\"\u201e\", '\"')\n\n        # Replace things that are like '\n        sentence = sentence.replace(\"\u2019\", \"'\").replace(\"\u02bb\", \"'\").replace(\"\u2018\", \"'\").replace(\"\u00b4\", \"'\").replace(\"\u02bc\", \"'\")\n\n        # Replace things that are like -\n        sentence = sentence.replace(\"\u2013\", \"-\").replace(\"\u2014\", \"-\").replace(\"\u2011\", \"-\").replace(\"\u2212\", \"-\").replace(\"\u30fc\", \"-\")\n\n        # Replace other punctuations\n        sentence = sentence.replace(\"\u2026\", \"...\").replace(\"\u201a\", \",\").replace(\"\u2024\", \".\")\n\n        # TODO: Each keyboard has its own way to deal with punctuation\n        # (applying auto-correction or not, displaying next-word prediction or\n        # not, etc...). So for now we just get rid of the punctuations, it's a\n        # convenient shortcut and it's fair to all keyboards.\n        # Eventually we should find a better way to deal with that.\n        sentence = re.sub(r\"\\s*\\.+\\s*\", \" \", sentence)\n        sentence = re.sub(r\"\\s*[,:;\\(\\)\\\"!?\\[\\]\\{\\}~]\\s*\", \" \", sentence)\n\n        return sentence\n\n    def word_split(self, sentence: str) -> List[str]:\n        \"\"\"Method for splitting a sentence into a list of words.\n\n        Args:\n            sentence (str): Sentence to split.\n\n        Returns:\n            List of words from the sentence.\n        \"\"\"\n        return sentence.strip().split()\n\n    def update_context(self, context: str, word: str) -> str:\n        \"\"\"Method for updating a context, given a word that was typed.\n\n        Args:\n            context (str): Existing context.\n            word (str): Word being typed.\n\n        Returns:\n            Updated context.\n        \"\"\"\n        return context + word + \" \"\n
"},{"location":"internals/#kebbie.tokenizer.BasicTokenizer.preprocess","title":"preprocess(sentence)","text":"

Method for simple preprocessing.

The goal of this function is not to provide an extensive and clean preprocessing. The goal is just to normalize some characters (that are not in our keyboard, so the user can't officially type them) into their normal counterpart, that are in the keyboard.

Parameters:

Name Type Description Default sentence str

String to normalize.

required

Returns:

Type Description str

Normalized string.

Source code in kebbie/tokenizer.py
def preprocess(self, sentence: str) -> str:\n    \"\"\"Method for simple preprocessing.\n\n    The goal of this function is not to provide an extensive and clean\n    preprocessing. The goal is just to normalize some characters (that\n    are not in our keyboard, so the user can't officially type them) into\n    their normal counterpart, that are in the keyboard.\n\n    Args:\n        sentence (str): String to normalize.\n\n    Returns:\n        Normalized string.\n    \"\"\"\n    # Replace things that are like \"\n    sentence = sentence.replace(\"\u201c\", '\"').replace(\"\u201d\", '\"').replace(\"\u201e\", '\"')\n\n    # Replace things that are like '\n    sentence = sentence.replace(\"\u2019\", \"'\").replace(\"\u02bb\", \"'\").replace(\"\u2018\", \"'\").replace(\"\u00b4\", \"'\").replace(\"\u02bc\", \"'\")\n\n    # Replace things that are like -\n    sentence = sentence.replace(\"\u2013\", \"-\").replace(\"\u2014\", \"-\").replace(\"\u2011\", \"-\").replace(\"\u2212\", \"-\").replace(\"\u30fc\", \"-\")\n\n    # Replace other punctuations\n    sentence = sentence.replace(\"\u2026\", \"...\").replace(\"\u201a\", \",\").replace(\"\u2024\", \".\")\n\n    # TODO: Each keyboard has its own way to deal with punctuation\n    # (applying auto-correction or not, displaying next-word prediction or\n    # not, etc...). So for now we just get rid of the punctuations, it's a\n    # convenient shortcut and it's fair to all keyboards.\n    # Eventually we should find a better way to deal with that.\n    sentence = re.sub(r\"\\s*\\.+\\s*\", \" \", sentence)\n    sentence = re.sub(r\"\\s*[,:;\\(\\)\\\"!?\\[\\]\\{\\}~]\\s*\", \" \", sentence)\n\n    return sentence\n
"},{"location":"internals/#kebbie.tokenizer.BasicTokenizer.word_split","title":"word_split(sentence)","text":"

Method for splitting a sentence into a list of words.

Parameters:

Name Type Description Default sentence str

Sentence to split.

required

Returns:

Type Description List[str]

List of words from the sentence.

Source code in kebbie/tokenizer.py
def word_split(self, sentence: str) -> List[str]:\n    \"\"\"Method for splitting a sentence into a list of words.\n\n    Args:\n        sentence (str): Sentence to split.\n\n    Returns:\n        List of words from the sentence.\n    \"\"\"\n    return sentence.strip().split()\n
"},{"location":"internals/#kebbie.tokenizer.BasicTokenizer.update_context","title":"update_context(context, word)","text":"

Method for updating a context, given a word that was typed.

Parameters:

Name Type Description Default context str

Existing context.

required word str

Word being typed.

required

Returns:

Type Description str

Updated context.

Source code in kebbie/tokenizer.py
def update_context(self, context: str, word: str) -> str:\n    \"\"\"Method for updating a context, given a word that was typed.\n\n    Args:\n        context (str): Existing context.\n        word (str): Word being typed.\n\n    Returns:\n        Updated context.\n    \"\"\"\n    return context + word + \" \"\n
"},{"location":"internals/#utilspy","title":"utils.py","text":"

Various utils function used by kebbie.

"},{"location":"internals/#kebbie.utils.profile_fn","title":"profile_fn(fn, *args, **kwargs)","text":"

Profile the runtime and memory usage of the given function.

Note that it will only account for memory allocated by python (if you use a library in C/C++ that does its own allocation, it won't report it).

Parameters:

Name Type Description Default fn Callable

Function to profile.

required *args Any

Positional arguments to pass to the given function.

() **kwargs Any

Keywords arguments to pass to the given function.

{}

Returns:

Type Description Any

The return value of the function called.

int

The memory usage (in bytes).

int

The runtime (in nano seconds).

Source code in kebbie/utils.py
def profile_fn(fn: Callable, *args: Any, **kwargs: Any) -> Tuple[Any, int, int]:\n    \"\"\"Profile the runtime and memory usage of the given function.\n\n    Note that it will only account for memory allocated by python (if you use\n    a library in C/C++ that does its own allocation, it won't report it).\n\n    Args:\n        fn (Callable): Function to profile.\n        *args: Positional arguments to pass to the given function.\n        **kwargs: Keywords arguments to pass to the given function.\n\n    Returns:\n        The return value of the function called.\n        The memory usage (in bytes).\n        The runtime (in nano seconds).\n    \"\"\"\n    tracemalloc.start()\n    t0 = time.time()\n\n    result = fn(*args, **kwargs)\n\n    runtime = time.time() - t0\n    _, memory = tracemalloc.get_traced_memory()\n\n    return result, memory, runtime * SEC_TO_NANOSEC\n
"},{"location":"internals/#kebbie.utils.euclidian_dist","title":"euclidian_dist(p1, p2)","text":"

Function computing the euclidian distance between 2 points.

Parameters:

Name Type Description Default p1 Tuple[float, float]

Point 1.

required p2 Tuple[float, float]

Point 2.

required

Returns:

Type Description float

Euclidian distance between the 2 given points.

Source code in kebbie/utils.py
def euclidian_dist(p1: Tuple[float, float], p2: Tuple[float, float]) -> float:\n    \"\"\"Function computing the euclidian distance between 2 points.\n\n    Args:\n        p1 (Tuple[float, float]): Point 1.\n        p2 (Tuple[float, float]): Point 2.\n\n    Returns:\n        Euclidian distance between the 2 given points.\n    \"\"\"\n    return math.sqrt(sum((a - b) ** 2 for a, b in zip(p1, p2)))\n
"},{"location":"internals/#kebbie.utils.load_keyboard","title":"load_keyboard(lang='en-US')","text":"

Load the keyboard data for the given language.

For now, only en-US is supported.

Parameters:

Name Type Description Default lang str

Language of the keyboard to load.

'en-US'

Returns:

Type Description Dict

The keyboard data.

Source code in kebbie/utils.py
def load_keyboard(lang: str = \"en-US\") -> Dict:\n    \"\"\"Load the keyboard data for the given language.\n\n    For now, only `en-US` is supported.\n\n    Args:\n        lang (str, optional): Language of the keyboard to load.\n\n    Returns:\n        The keyboard data.\n    \"\"\"\n    layout_folder = Path(__file__).parent / \"layouts\"\n    with open(layout_folder / f\"{lang}.json\", \"r\") as f:\n        keyboard = json.load(f)\n    return keyboard\n
"},{"location":"internals/#kebbie.utils.strip_accents","title":"strip_accents(s)","text":"

Util function for removing accents from a given string.

Parameters:

Name Type Description Default s str

Accented string.

required

Returns:

Type Description str

Same string, without accent.

Source code in kebbie/utils.py
def strip_accents(s: str) -> str:\n    \"\"\"Util function for removing accents from a given string.\n\n    Args:\n        s (str): Accented string.\n\n    Returns:\n        Same string, without accent.\n    \"\"\"\n    nfkd_form = unicodedata.normalize(\"NFKD\", s)\n    return \"\".join([c for c in nfkd_form if not unicodedata.combining(c)])\n
"},{"location":"internals/#kebbie.utils.sample","title":"sample(proba)","text":"

Simple function to sample an event with the given probability. For example, calling sample(0.95) will return True in 95% cases, and False in 5% cases.

Parameters:

Name Type Description Default proba float

Probability of the event to happen. Should be between 0 and 1 (included).

required

Returns:

Type Description bool

True if the event was sampled, False otherwise.

Source code in kebbie/utils.py
def sample(proba: float) -> bool:\n    \"\"\"Simple function to sample an event with the given probability.\n    For example, calling `sample(0.95)` will return `True` in 95% cases, and\n    `False` in 5% cases.\n\n    Args:\n        proba (float): Probability of the event to happen. Should be between 0\n            and 1 (included).\n\n    Returns:\n        `True` if the event was sampled, `False` otherwise.\n    \"\"\"\n    assert 0 <= proba <= 1, f\"`{proba}` is not a valid probability (should be between 0 and 1)\"\n    if proba == 0:\n        return False\n    elif proba == 1:\n        return True\n    else:\n        return random.choices([True, False], weights=[proba, 1 - proba])[0]\n
"},{"location":"internals/#kebbie.utils.sample_among","title":"sample_among(probs, with_none=True)","text":"

Function that sample an event among several with different probabilities.

Parameters:

Name Type Description Default probs Dict[Any, float]

Dictionary representing the different events and their probabilities. Each probability should be above 0 and their sum should not exceed 1.

required with_none bool

If set to True, add a None option (no event sampled).

True

Returns:

Type Description Any

The corresponding key of the event sampled.

Source code in kebbie/utils.py
def sample_among(probs: Dict[Any, float], with_none: bool = True) -> Any:\n    \"\"\"Function that sample an event among several with different\n    probabilities.\n\n    Args:\n        probs (Dict[Any, float]): Dictionary representing the different events\n            and their probabilities. Each probability should be above 0 and\n            their sum should not exceed 1.\n        with_none (bool): If set to `True`, add a `None` option (no event\n            sampled).\n\n    Returns:\n        The corresponding key of the event sampled.\n    \"\"\"\n    options = list(probs.keys())\n    weights = list(probs.values())\n    assert (\n        all(w >= 0 for w in weights) and sum(weights) <= 1\n    ), \"The numbers given are not a probability (should be above 0 and their sum should not exceed 1)\"\n\n    if with_none:\n        options.append(None)\n        weights.append(1 - sum(weights))\n\n    return random.choices(options, weights=weights)[0]\n
"},{"location":"internals/#kebbie.utils.sample_partial_word","title":"sample_partial_word(keystrokes, word, true_word)","text":"

Sample a partial word from a given word, and extract the corresponding keystrokes as well.

Sampling is done with increasing weights (more chances to sample a longer list). For example if the list represent the keystrokes of \"abcdef\", the probabilities are as follow: * \"a\" : 1/15 * \"ab\" : 2/15 * \"abc\" : 3/15 * \"abcd\" : 4/15 * \"abcde\" : 5/15

Parameters:

Name Type Description Default keystrokes List[Optional[Tuple[float, float]]]

Complete list of keystrokes, representing a full word.

required word str

The word corresponding to the keystrokes.

required true_word str

Actual word (without typo). Necessary to ensure the sampled keystrokes are partial.

required

Returns:

Type Description List[Optional[Tuple[float, float]]]

The partial list of keystrokes (sampled from the given word).

str

The partial word (sampled from the given word).

Source code in kebbie/utils.py
def sample_partial_word(\n    keystrokes: List[Optional[Tuple[float, float]]], word: str, true_word: str\n) -> Tuple[List[Optional[Tuple[float, float]]], str]:\n    \"\"\"Sample a partial word from a given word, and extract the corresponding\n    keystrokes as well.\n\n    Sampling is done with increasing weights (more chances to sample a longer\n    list). For example if the list represent the keystrokes of \"abcdef\", the\n    probabilities are as follow:\n     * \"a\" :     1/15\n     * \"ab\" :    2/15\n     * \"abc\" :   3/15\n     * \"abcd\" :  4/15\n     * \"abcde\" : 5/15\n\n    Args:\n        keystrokes (List[Optional[Tuple[float, float]]]): Complete list of\n            keystrokes, representing a full word.\n        word (str): The word corresponding to the keystrokes.\n        true_word (str): Actual word (without typo). Necessary to ensure the\n            sampled keystrokes are partial.\n\n    Returns:\n        The partial list of keystrokes (sampled from the given word).\n        The partial word (sampled from the given word).\n    \"\"\"\n    r = range(1, min(len(true_word), len(word)))\n    s = random.choices(r, weights=r)[0]\n    return keystrokes[:s], word[:s]\n
"},{"location":"internals/#kebbie.utils.accuracy","title":"accuracy(tp, tn, fp, fn)","text":"

Function computing the precision.

Parameters:

Name Type Description Default tp int

Number of True Positive.

required tn int

Number of True Negative.

required fp int

Number of False Positive.

required fn int

Number of False Negative.

required

Returns:

Type Description float

Accuracy.

Source code in kebbie/utils.py
def accuracy(tp: int, tn: int, fp: int, fn: int) -> float:\n    \"\"\"Function computing the precision.\n\n    Args:\n        tp (int): Number of True Positive.\n        tn (int): Number of True Negative.\n        fp (int): Number of False Positive.\n        fn (int): Number of False Negative.\n\n    Returns:\n        Accuracy.\n    \"\"\"\n    try:\n        return (tp + tn) / (tp + tn + fp + fn)\n    except ZeroDivisionError:\n        return 0\n
"},{"location":"internals/#kebbie.utils.precision","title":"precision(tp, fp)","text":"

Function computing the precision.

Parameters:

Name Type Description Default tp int

Number of True Positive.

required fp int

Number of False Positive.

required

Returns:

Type Description float

Precision.

Source code in kebbie/utils.py
def precision(tp: int, fp: int) -> float:\n    \"\"\"Function computing the precision.\n\n    Args:\n        tp (int): Number of True Positive.\n        fp (int): Number of False Positive.\n\n    Returns:\n        Precision.\n    \"\"\"\n    try:\n        return tp / (tp + fp)\n    except ZeroDivisionError:\n        return 0\n
"},{"location":"internals/#kebbie.utils.recall","title":"recall(tp, fn)","text":"

Function computing the recall.

Parameters:

Name Type Description Default tp int

Number of True Positive.

required fn int

Number of False Negative.

required

Returns:

Type Description float

Recall.

Source code in kebbie/utils.py
def recall(tp: int, fn: int) -> float:\n    \"\"\"Function computing the recall.\n\n    Args:\n        tp (int): Number of True Positive.\n        fn (int): Number of False Negative.\n\n    Returns:\n        Recall.\n    \"\"\"\n    try:\n        return tp / (tp + fn)\n    except ZeroDivisionError:\n        return 0\n
"},{"location":"internals/#kebbie.utils.fbeta","title":"fbeta(precision, recall, beta=1)","text":"

Function computing the F-beta score (which is a generalization of the F1 score).

The value of Beta changes how much we weight recall versus precision
  • For beta=0.5, Precision is twice as important as Recall
  • For beta=2, Recall is twice as important as Precision

Parameters:

Name Type Description Default precision float

Precision.

required recall float

Recall.

required beta float

Beta factor.

1

Returns:

Type Description float

F-beta score.

Source code in kebbie/utils.py
def fbeta(precision: float, recall: float, beta: float = 1) -> float:\n    \"\"\"Function computing the F-beta score (which is a generalization of the\n    F1 score).\n\n    The value of Beta changes how much we weight recall versus precision:\n     * For beta=0.5, Precision is twice as important as Recall\n     * For beta=2, Recall is twice as important as Precision\n\n    Args:\n        precision (float): Precision.\n        recall (float): Recall.\n        beta (float): Beta factor.\n\n    Returns:\n        F-beta score.\n    \"\"\"\n    try:\n        return (1 + beta**2) * precision * recall / (beta**2 * precision + recall)\n    except ZeroDivisionError:\n        return 0\n
"},{"location":"internals/#kebbie.utils.round_to_n","title":"round_to_n(x, n=2)","text":"

Util function to round a given number to n significant digits.

Parameters:

Name Type Description Default x float

Number to round.

required n int

Number of significant digits to use.

2

Returns:

Type Description float

Rounded number.

Source code in kebbie/utils.py
def round_to_n(x: float, n: int = 2) -> float:\n    \"\"\"Util function to round a given number to n significant digits.\n\n    Args:\n        x (float): Number to round.\n        n (int): Number of significant digits to use.\n\n    Returns:\n        Rounded number.\n    \"\"\"\n    return round(x, -int(math.floor(math.log10(x))) + (n - 1)) if x != 0 else 0\n
"},{"location":"internals/#kebbie.utils.human_readable_memory","title":"human_readable_memory(x)","text":"

Given a number in bytes, return a human-readable string of this number, with the right unit.

Parameters:

Name Type Description Default x int

Number in bytes.

required

Returns:

Type Description str

Human-readable version of the given number, with the right unit.

Source code in kebbie/utils.py
def human_readable_memory(x: int) -> str:\n    \"\"\"Given a number in bytes, return a human-readable string of this number,\n    with the right unit.\n\n    Args:\n        x (int): Number in bytes.\n\n    Returns:\n        Human-readable version of the given number, with the right unit.\n    \"\"\"\n    x = round_to_n(x, n=3)\n    for unit in [\"B\", \"KB\", \"MB\", \"GB\"]:\n        if x < 1000:\n            return f\"{x:g} {unit}\"\n\n        x /= 1000\n    return f\"{x:g} TB\"\n
"},{"location":"internals/#kebbie.utils.human_readable_runtime","title":"human_readable_runtime(x)","text":"

Given a number in nanoseconds, return a human-readable string of this number, with the right unit.

Parameters:

Name Type Description Default x int

Number in nanoseconds.

required

Returns:

Type Description str

Human-readable version of the given number, with the right unit.

Source code in kebbie/utils.py
def human_readable_runtime(x: int) -> str:\n    \"\"\"Given a number in nanoseconds, return a human-readable string of this\n    number, with the right unit.\n\n    Args:\n        x (int): Number in nanoseconds.\n\n    Returns:\n        Human-readable version of the given number, with the right unit.\n    \"\"\"\n    x = round_to_n(x, n=3)\n    for unit in [\"ns\", \"\u03bcs\", \"ms\"]:\n        if x < 1000:\n            return f\"{x:g} {unit}\"\n\n        x /= 1000\n    return f\"{x:g} s\"\n
"},{"location":"internals/#kebbie.utils.get_soda_dataset","title":"get_soda_dataset(max_sentences=2000, seed=31)","text":"

Load the SODA dataset.

Parameters:

Name Type Description Default max_sentences int

Maximum number of sentences in total in the dataset. They will be shared across domain (50% from the narrative domain, 50% from the dialogue domain).

2000 seed int

Seed to use when shuffling the dataset (since we don't use the whole dataset, it's better to shuffle it before extracting the X first sentences).

31

Returns:

Type Description Dict[str, List[str]]

The dataset, separated into two domains : narrative and dialogue.

Source code in kebbie/utils.py
def get_soda_dataset(max_sentences: int = 2_000, seed: int = 31) -> Dict[str, List[str]]:\n    \"\"\"Load the SODA dataset.\n\n    Args:\n        max_sentences (int, optional): Maximum number of sentences in total in\n            the dataset. They will be shared across domain (50% from the\n            `narrative` domain, 50% from the `dialogue` domain).\n        seed (int, optional): Seed to use when shuffling the dataset (since we\n            don't use the whole dataset, it's better to shuffle it before\n            extracting the X first sentences).\n\n    Returns:\n        The dataset, separated into two domains : narrative and dialogue.\n    \"\"\"\n    data = {\"narrative\": [], \"dialogue\": []}\n    max_domain_sentences = max_sentences // 2\n\n    hf_dataset = datasets.load_dataset(\"allenai/soda\", split=\"test\")\n    hf_dataset = hf_dataset.shuffle(seed=seed)\n\n    for sample in hf_dataset:\n        if len(data[\"narrative\"]) >= max_domain_sentences and len(data[\"dialogue\"]) >= max_domain_sentences:\n            break\n\n        if len(data[\"narrative\"]) < max_domain_sentences:\n            data[\"narrative\"].append(sample[\"narrative\"])\n\n        for sen in sample[\"dialogue\"]:\n            if len(data[\"dialogue\"]) < max_domain_sentences:\n                data[\"dialogue\"].append(sen)\n\n    return data\n
"},{"location":"internals/#constants","title":"Constants","text":""},{"location":"internals/#__init__py","title":"__init__.py","text":""},{"location":"internals/#kebbie.SUPPORTED_LANG","title":"SUPPORTED_LANG = ['en-US']","text":""},{"location":"internals/#kebbie.N_MOST_COMMON_MISTAKES","title":"N_MOST_COMMON_MISTAKES = 1000","text":""},{"location":"internals/#kebbie.DEFAULT_SEED","title":"DEFAULT_SEED = 42","text":""},{"location":"internals/#emulatorpy_1","title":"emulator.py","text":""},{"location":"internals/#kebbie.emulator.ANDROID","title":"ANDROID = 'android'","text":""},{"location":"internals/#kebbie.emulator.IOS","title":"IOS = 'ios'","text":""},{"location":"internals/#kebbie.emulator.GBOARD","title":"GBOARD = 'gboard'","text":""},{"location":"internals/#kebbie.emulator.TAPPA","title":"TAPPA = 'tappa'","text":""},{"location":"internals/#kebbie.emulator.FLEKSY","title":"FLEKSY = 'fleksy'","text":""},{"location":"internals/#kebbie.emulator.KBKITPRO","title":"KBKITPRO = 'kbkitpro'","text":""},{"location":"internals/#kebbie.emulator.KBKITOSS","title":"KBKITOSS = 'kbkitoss'","text":""},{"location":"internals/#kebbie.emulator.SWIFTKEY","title":"SWIFTKEY = 'swiftkey'","text":""},{"location":"internals/#kebbie.emulator.KEYBOARD_PACKAGE","title":"KEYBOARD_PACKAGE = {GBOARD: 'com.google.android.inputmethod.latin', SWIFTKEY: 'com.touchtype.swiftkey', TAPPA: 'com.tappa.keyboard'}","text":""},{"location":"internals/#kebbie.emulator.ANDROID_CAPABILITIES","title":"ANDROID_CAPABILITIES = {'platformName': 'android', 'automationName': 'UiAutomator2', 'enableMultiWindows': True, 'deviceName': 'test', 'newCommandTimeout': 3600}","text":""},{"location":"internals/#kebbie.emulator.IOS_CAPABILITIES","title":"IOS_CAPABILITIES = {'platformName': 'iOS', 'automationName': 'XCUITest', 'udid': 'auto', 'xcodeOrgId': '8556JTA4X4', 'xcodeSigningId': 'iPhone Developer', 'useNewWDA': False, 'usePrebuiltWdDA': True, 'startIWDP': True, 'bundleId': 'com.apple.MobileSMS', 'newCommandTimeout': 3600}","text":""},{"location":"internals/#kebbie.emulator.BROWSER_PAD_URL","title":"BROWSER_PAD_URL = 'https://www.justnotepad.com'","text":""},{"location":"internals/#kebbie.emulator.ANDROID_TYPING_FIELD_CLASS_NAME","title":"ANDROID_TYPING_FIELD_CLASS_NAME = 'android.widget.EditText'","text":""},{"location":"internals/#kebbie.emulator.DUMMY_RECIPIENT","title":"DUMMY_RECIPIENT = '0'","text":""},{"location":"internals/#kebbie.emulator.IOS_TYPING_FIELD_ID","title":"IOS_TYPING_FIELD_ID = 'messageBodyField'","text":""},{"location":"internals/#kebbie.emulator.IOS_START_CHAT_CLASS_NAME","title":"IOS_START_CHAT_CLASS_NAME = 'XCUIElementTypeCell'","text":""},{"location":"internals/#kebbie.emulator.TESSERACT_CONFIG","title":"TESSERACT_CONFIG = '-c tessedit_char_blacklist=0123456789\u201d:!@\u00b7$%&/()=.\u00bf?'","text":""},{"location":"internals/#kebbie.emulator.PREDICTION_DELAY","title":"PREDICTION_DELAY = 0.4","text":""},{"location":"internals/#kebbie.emulator.CONTENT_TO_IGNORE","title":"CONTENT_TO_IGNORE = ['Sticker', 'GIF', 'Clipboard', 'Settings', 'Back', 'Switch input method', 'Paste item', 'Close', 'paintpalette', 'Search Document', 'Microphone', 'gearshape', 'Next Locale', 'paintpalette', 'EmojiCategories/smileysAndPeople', 'EmojiCategories/animalsAndNature', 'EmojiCategories/foodAndDrink', 'EmojiCategories/activity', 'EmojiCategories/travelAndPlaces', 'EmojiCategories/objects', 'EmojiCategories/symbols', 'EmojiCategories/flags', 'Add', 'And', 'Are', '\u201cA\u201d', '\ud83d\ude80']","text":""},{"location":"internals/#kebbie.emulator.CONTENT_TO_RENAME","title":"CONTENT_TO_RENAME = {'Shift': 'shift', 'Delete': 'backspace', 'Backspace': 'backspace', 'Space': 'spacebar', 'space': 'spacebar', 'Emoji button': 'smiley', 'Emoji': 'smiley', 'Keyboard Type - emojis': 'smiley', 'Search': 'enter', 'return': 'enter', 'Enter': 'enter', 'Symbol keyboard': 'numbers', 'Symbols': 'numbers', 'Symbols and numbers': 'numbers', 'Keyboard Type - numeric': 'numbers', 'Voice input': 'mic', ',, alternatives available, Voice typing, long press to activate': 'mic', 'Close features menu': 'magic', 'Open features menu': 'magic', 'underline': '_', '&amp;': '&', 'ampersand': '&', 'Dash': '-', 'Plus': '+', 'Left parenthesis': '(', 'Right parenthesis': ')', 'slash': '/', 'Apostrophe': \"'\", 'Colon': ':', 'Semicolon': ';', 'Exclamation': '!', 'Question mark': '?', 'Letter keyboard': 'letters', 'Letters': 'letters', 'Keyboard Type - auto': 'letters', 'Digit keyboard': 'numbers', 'More symbols': 'shift', 'Keyboard Type - symbolic': 'shift', 'Double tap for uppercase': 'shift', 'Double tap for caps lock': 'shift', 'capital Q': 'Q', 'capital W': 'W', 'capital E': 'E', 'capital R': 'R', 'capital T': 'T', 'capital Y': 'Y', 'capital U': 'U', 'capital I': 'I', 'Capital I': 'I', 'capital O': 'O', 'capital P': 'P', 'capital A': 'A', 'capital S': 'S', 'capital D': 'D', 'capital F': 'F', 'capital G': 'G', 'capital H': 'H', 'capital J': 'J', 'capital K': 'K', 'capital L': 'L', 'capital Z': 'Z', 'capital X': 'X', 'capital C': 'C', 'capital V': 'V', 'capital B': 'B', 'capital N': 'N', 'capital M': 'M'}","text":""},{"location":"internals/#kebbie.emulator.FLEKSY_LAYOUT","title":"FLEKSY_LAYOUT = {'keyboard_frame': [0, 517, 393, 266], 'lowercase': {'q': [0.007407407407407408, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'w': [0.10462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'e': [0.20462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'r': [0.30462962962962964, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 't': [0.4046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'y': [0.5046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'u': [0.6046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'i': [0.7046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'o': [0.8046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'p': [0.9046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'a': [0.05740740740740741, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 's': [0.15555555555555556, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'd': [0.25555555555555554, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'f': [0.35462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'g': [0.4546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'h': [0.5546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'j': [0.6546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'k': [0.7546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'l': [0.8555555555555555, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'shift': [0.007407407407407408, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'z': [0.15555555555555556, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'x': [0.25555555555555554, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'c': [0.35462962962962963, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'v': [0.4546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'b': [0.5546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'n': [0.6546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'm': [0.7546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'backspace': [0.8555555555555555, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'numbers': [0.007407407407407408, 0.8080821917808219, 0.125, 0.1643835616438356], 'smiley': [0.14351851851851852, 0.8080821917808219, 0.10277777777777777, 0.1643835616438356], 'spacebar': [0.25555555555555554, 0.8080821917808219, 0.48703703703703705, 0.1643835616438356], '.': [0.7546296296296297, 0.8080821917808219, 0.1, 0.1643835616438356], 'enter': [0.8648148148148148, 0.8080821917808219, 0.12962962962962962, 0.1643835616438356]}, 'uppercase': {'Q': [0.007407407407407408, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'W': [0.10462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'E': [0.20462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'R': [0.30462962962962964, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'T': [0.4046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'Y': [0.5046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'U': [0.6046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'I': [0.7046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'O': [0.8046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'P': [0.9046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'A': [0.05740740740740741, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'S': [0.15555555555555556, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'D': [0.25555555555555554, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'F': [0.35462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'G': [0.4546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'H': [0.5546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'J': [0.6546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'K': [0.7546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'L': [0.8555555555555555, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'shift': [0.007407407407407408, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'Z': [0.15555555555555556, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'X': [0.25555555555555554, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'C': [0.35462962962962963, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'V': [0.4546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'B': [0.5546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'N': [0.6546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'M': [0.7546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'backspace': [0.8555555555555555, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'numbers': [0.007407407407407408, 0.8080821917808219, 0.125, 0.1643835616438356], 'smiley': [0.14351851851851852, 0.8080821917808219, 0.10277777777777777, 0.1643835616438356], 'spacebar': [0.25555555555555554, 0.8080821917808219, 0.48703703703703705, 0.1643835616438356], '.': [0.7546296296296297, 0.8080821917808219, 0.1, 0.1643835616438356], 'enter': [0.8648148148148148, 0.8080821917808219, 0.12962962962962962, 0.1643835616438356]}, 'numbers': {'1': [0.007407407407407408, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '2': [0.10462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '3': [0.20462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '4': [0.30462962962962964, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '5': [0.4046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '6': [0.5046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '7': [0.6046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '8': [0.7046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '9': [0.8046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '0': [0.9046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '-': [0.007407407407407408, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '/': [0.10462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], ':': [0.20462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], ';': [0.30462962962962964, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '(': [0.4046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], ')': [0.5046296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '$': [0.6046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '&': [0.7046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '@': [0.8046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '\"': [0.9046296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'shift': [0.007407407407407408, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], ',': [0.3101851851851852, 0.5994520547945206, 0.12, 0.1643835616438356], '?': [0.44044444444444447, 0.5994520547945206, 0.12, 0.1643835616438356], '!': [0.5707037037037037, 0.5994520547945206, 0.12, 0.1643835616438356], \"'\": [0.705962962962963, 0.5994520547945206, 0.12, 0.1643835616438356], 'backspace': [0.8551851851851852, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'letters': [0.007407407407407408, 0.8080821917808219, 0.125, 0.1643835616438356], 'smiley': [0.14351851851851852, 0.8080821917808219, 0.10277777777777777, 0.1643835616438356], 'spacebar': [0.25555555555555554, 0.8080821917808219, 0.48703703703703705, 0.1643835616438356], '.': [0.7546296296296297, 0.8080821917808219, 0.1, 0.1643835616438356], 'enter': [0.8648148148148148, 0.8080821917808219, 0.12962962962962962, 0.1643835616438356]}}","text":""},{"location":"internals/#gesturepy_1","title":"gesture.py","text":""},{"location":"internals/#kebbie.gesture.MAX_RADIUS","title":"MAX_RADIUS = 16","text":""},{"location":"internals/#kebbie.gesture.MIN_N_POINTS_PER_DIST","title":"MIN_N_POINTS_PER_DIST = 0.1","text":""},{"location":"internals/#kebbie.gesture.MAX_N_POINTS_PER_DIST","title":"MAX_N_POINTS_PER_DIST = 0.25","text":""},{"location":"internals/#kebbie.gesture.MIN_ACCELERATION","title":"MIN_ACCELERATION = 0.2","text":""},{"location":"internals/#kebbie.gesture.MAX_ACCELERATION","title":"MAX_ACCELERATION = 0.5","text":""},{"location":"internals/#layoutpy_1","title":"layout.py","text":""},{"location":"internals/#kebbie.layout.SPACE","title":"SPACE = 'spacebar'","text":""},{"location":"internals/#kebbie.layout.POINT","title":"POINT = '.'","text":""},{"location":"internals/#kebbie.layout.N_ACCENT_PER_LINE","title":"N_ACCENT_PER_LINE = 4","text":""},{"location":"internals/#noise_modelpy_1","title":"noise_model.py","text":""},{"location":"internals/#kebbie.noise_model.DEFAULT_TYPO_PROBS","title":"DEFAULT_TYPO_PROBS = {Typo.TRANSPOSE_CHAR: 0.01, Typo.DELETE_SPELLING_SYMBOL: 0.1, Typo.ADD_SPELLING_SYMBOL: 0, Typo.DELETE_SPACE: 0.01, Typo.ADD_SPACE: 0, Typo.DELETE_PUNCTUATION: 0, Typo.ADD_PUNCTUATION: 0, Typo.DELETE_CHAR: 0.005, Typo.ADD_CHAR: 0.005, Typo.SIMPLIFY_ACCENT: 0.08, Typo.SIMPLIFY_CASE: 0.08, Typo.COMMON_TYPO: 0.05}","text":""},{"location":"internals/#kebbie.noise_model.SPACE","title":"SPACE = ' '","text":""},{"location":"internals/#kebbie.noise_model.DELETIONS","title":"DELETIONS = [Typo.DELETE_SPELLING_SYMBOL, Typo.DELETE_SPACE, Typo.DELETE_PUNCTUATION, Typo.DELETE_CHAR]","text":""},{"location":"internals/#kebbie.noise_model.FRONT_DELETION_MULTIPLIER","title":"FRONT_DELETION_MULTIPLIER = 0.36","text":""},{"location":"internals/#kebbie.noise_model.DEFAULT_SIGMA_RATIO","title":"DEFAULT_SIGMA_RATIO = 3","text":""},{"location":"internals/#kebbie.noise_model.CACHE_DIR","title":"CACHE_DIR = os.path.expanduser('~/.cache/common_typos/')","text":""},{"location":"internals/#kebbie.noise_model.TWEET_TYPO_CORPUS_URL","title":"TWEET_TYPO_CORPUS_URL = 'https://luululu.com/tweet/typo-corpus-r1.txt'","text":""},{"location":"internals/#oraclepy_1","title":"oracle.py","text":""},{"location":"internals/#kebbie.oracle.CHUNK_SIZE","title":"CHUNK_SIZE = 10","text":""},{"location":"internals/#kebbie.oracle.MAX_CHAR_PER_SENTENCE","title":"MAX_CHAR_PER_SENTENCE = 256","text":""},{"location":"internals/#kebbie.oracle.SWIPE_PROB","title":"SWIPE_PROB = 0.01","text":""},{"location":"internals/#scorerpy_1","title":"scorer.py","text":""},{"location":"internals/#kebbie.scorer.DEFAULT_BETA","title":"DEFAULT_BETA = 0.9","text":""},{"location":"internals/#kebbie.scorer.WITH_TYPO","title":"WITH_TYPO = 'with_typo'","text":""},{"location":"internals/#kebbie.scorer.WITHOUT_TYPO","title":"WITHOUT_TYPO = 'without_typo'","text":""},{"location":"internals/#utilspy_1","title":"utils.py","text":""},{"location":"internals/#kebbie.utils.SEC_TO_NANOSEC","title":"SEC_TO_NANOSEC = 10000000000.0","text":""},{"location":"leaderboard/","title":"Leaderboard","text":"Keyboard Score Next-word prediction Auto-completion Auto-correction Gboard 0.54 0.33 0.79 0.82 iOS keyboard 0.46 0.43 0.8 0.6 Fleksy 0.43 0.27 0.73 0.6 KeyboardKit Pro 0.31 0 0.4 0.58 KeyboardKit Open-source 0 0 0 0.01

Info

The metrics used in this leaderboard are :

  • For next-word prediction : top-3 accuracy
  • For auto-completion : top-3 accuracy
  • For auto-correction : F-score

See Understanding the metrics for more details.

The overall score is a weighted sum of each task's score.

"},{"location":"public_api/","title":"Public API","text":""},{"location":"public_api/#classes","title":"Classes","text":""},{"location":"public_api/#kebbie.correctors.Corrector","title":"Corrector","text":"

Base class for Corrector, which is the component being tested.

Child classes should overwrite auto_correct(), auto_complete(), resolve_swipe(), and predict_next_word().

By default, the implementation for these methods is dummy : just return an empty list of candidates.

Source code in kebbie/correctors.py
class Corrector:\n    \"\"\"Base class for Corrector, which is the component being tested.\n\n    Child classes should overwrite `auto_correct()`, `auto_complete()`,\n    `resolve_swipe()`, and `predict_next_word()`.\n\n    By default, the implementation for these methods is dummy : just return an\n    empty list of candidates.\n    \"\"\"\n\n    def auto_correct(\n        self,\n        context: str,\n        keystrokes: List[Optional[Tuple[float, float]]],\n        word: str,\n    ) -> List[str]:\n        \"\"\"Method used for auto-correction.\n        Given a context and a typed word, this method should return a list of\n        possible candidates for correction.\n\n        Note that the typed word is given both as a plain string, and as a list\n        of keystrokes. The child class overwriting this method can use either\n        of them.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n            keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n                (x and y coordinates) for each keystroke of the word being\n                typed.\n            word (str): Word being typed (corresponding to the keystrokes).\n\n        Returns:\n            The list of correction candidates.\n        \"\"\"\n        return []\n\n    def auto_complete(\n        self,\n        context: str,\n        keystrokes: List[Optional[Tuple[float, float]]],\n        partial_word: str,\n    ) -> List[str]:\n        \"\"\"Method used for auto-completion.\n        Given a context and a partially typed word, this method should return\n        a list of possible candidates for completion.\n\n        Note that the typed word is given both as a plain string, and as a list\n        of keystrokes. The child class overwriting this method can use either\n        of them.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n            keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n                (x and y coordinates) for each keystroke of the word being\n                typed.\n            partial_word (str): Partial word being typed (corresponding to the\n                keystrokes).\n\n        Returns:\n            The list of completion candidates.\n        \"\"\"\n        return []\n\n    def resolve_swipe(self, context: str, swipe_gesture: List[Tuple[float, float]]) -> List[str]:\n        \"\"\"Method used for resolving a swipe gesture. Given a context and a\n        swipe gesture, this method should return a list of possible candidates\n        corresponding to this swipe gesture.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n            swipe_gesture (List[Tuple[float, float]]): List of positions (x and\n                y coordinates) along the keyboard, representing the swipe\n                gesture.\n\n        Returns:\n            The list of swiped word candidates.\n        \"\"\"\n        return []\n\n    def predict_next_word(self, context: str) -> List[str]:\n        \"\"\"Method used for next-word prediction task. Given a context, this\n        method should return a list of possible candidates for next-word.\n\n        Args:\n            context (str): String representing the previously typed characters\n                (the beginning of the sentence basically).\n\n        Returns:\n            The list of next-word candidates.\n        \"\"\"\n        return []\n\n    def profiled_auto_correct(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n        \"\"\"Profiled (memory & runtime) version of `auto_correct` method.\n\n        No need to overwrite this method, unless you want to specify a custom\n        memory and/or runtime measure.\n\n        Returns:\n            List of candidates returned from the profiled method.\n            Memory consumption in bytes.\n            Runtime in nano seconds.\n        \"\"\"\n        return profile_fn(self.auto_correct, *args, **kwargs)\n\n    def profiled_auto_complete(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n        \"\"\"Profiled (memory & runtime) version of `auto_complete` method.\n\n        No need to overwrite this method, unless you want to specify a custom\n        memory and/or runtime measure.\n\n        Returns:\n            List of candidates returned from the profiled method.\n            Memory consumption in bytes.\n            Runtime in nano seconds.\n        \"\"\"\n        return profile_fn(self.auto_complete, *args, **kwargs)\n\n    def profiled_resolve_swipe(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n        \"\"\"Profiled (memory & runtime) version of `resolve_swipe` method.\n\n        No need to overwrite this method, unless you want to specify a custom\n        memory and/or runtime measure.\n\n        Returns:\n            List of candidates returned from the profiled method.\n            Memory consumption in bytes.\n            Runtime in nano seconds.\n        \"\"\"\n        return profile_fn(self.resolve_swipe, *args, **kwargs)\n\n    def profiled_predict_next_word(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n        \"\"\"Profiled (memory & runtime) version of `predict_next_word` method.\n\n        No need to overwrite this method, unless you want to specify a custom\n        memory and/or runtime measure.\n\n        Returns:\n            List of candidates returned from the profiled method.\n            Memory consumption in bytes.\n            Runtime in nano seconds.\n        \"\"\"\n        return profile_fn(self.predict_next_word, *args, **kwargs)\n
"},{"location":"public_api/#kebbie.correctors.Corrector.auto_correct","title":"auto_correct(context, keystrokes, word)","text":"

Method used for auto-correction. Given a context and a typed word, this method should return a list of possible candidates for correction.

Note that the typed word is given both as a plain string, and as a list of keystrokes. The child class overwriting this method can use either of them.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required keystrokes List[Optional[Tuple[float, float]]]

List of positions (x and y coordinates) for each keystroke of the word being typed.

required word str

Word being typed (corresponding to the keystrokes).

required

Returns:

Type Description List[str]

The list of correction candidates.

Source code in kebbie/correctors.py
def auto_correct(\n    self,\n    context: str,\n    keystrokes: List[Optional[Tuple[float, float]]],\n    word: str,\n) -> List[str]:\n    \"\"\"Method used for auto-correction.\n    Given a context and a typed word, this method should return a list of\n    possible candidates for correction.\n\n    Note that the typed word is given both as a plain string, and as a list\n    of keystrokes. The child class overwriting this method can use either\n    of them.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n        keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n            (x and y coordinates) for each keystroke of the word being\n            typed.\n        word (str): Word being typed (corresponding to the keystrokes).\n\n    Returns:\n        The list of correction candidates.\n    \"\"\"\n    return []\n
"},{"location":"public_api/#kebbie.correctors.Corrector.auto_complete","title":"auto_complete(context, keystrokes, partial_word)","text":"

Method used for auto-completion. Given a context and a partially typed word, this method should return a list of possible candidates for completion.

Note that the typed word is given both as a plain string, and as a list of keystrokes. The child class overwriting this method can use either of them.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required keystrokes List[Optional[Tuple[float, float]]]

List of positions (x and y coordinates) for each keystroke of the word being typed.

required partial_word str

Partial word being typed (corresponding to the keystrokes).

required

Returns:

Type Description List[str]

The list of completion candidates.

Source code in kebbie/correctors.py
def auto_complete(\n    self,\n    context: str,\n    keystrokes: List[Optional[Tuple[float, float]]],\n    partial_word: str,\n) -> List[str]:\n    \"\"\"Method used for auto-completion.\n    Given a context and a partially typed word, this method should return\n    a list of possible candidates for completion.\n\n    Note that the typed word is given both as a plain string, and as a list\n    of keystrokes. The child class overwriting this method can use either\n    of them.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n        keystrokes (List[Optional[Tuple[float, float]]]): List of positions\n            (x and y coordinates) for each keystroke of the word being\n            typed.\n        partial_word (str): Partial word being typed (corresponding to the\n            keystrokes).\n\n    Returns:\n        The list of completion candidates.\n    \"\"\"\n    return []\n
"},{"location":"public_api/#kebbie.correctors.Corrector.resolve_swipe","title":"resolve_swipe(context, swipe_gesture)","text":"

Method used for resolving a swipe gesture. Given a context and a swipe gesture, this method should return a list of possible candidates corresponding to this swipe gesture.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required swipe_gesture List[Tuple[float, float]]

List of positions (x and y coordinates) along the keyboard, representing the swipe gesture.

required

Returns:

Type Description List[str]

The list of swiped word candidates.

Source code in kebbie/correctors.py
def resolve_swipe(self, context: str, swipe_gesture: List[Tuple[float, float]]) -> List[str]:\n    \"\"\"Method used for resolving a swipe gesture. Given a context and a\n    swipe gesture, this method should return a list of possible candidates\n    corresponding to this swipe gesture.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n        swipe_gesture (List[Tuple[float, float]]): List of positions (x and\n            y coordinates) along the keyboard, representing the swipe\n            gesture.\n\n    Returns:\n        The list of swiped word candidates.\n    \"\"\"\n    return []\n
"},{"location":"public_api/#kebbie.correctors.Corrector.predict_next_word","title":"predict_next_word(context)","text":"

Method used for next-word prediction task. Given a context, this method should return a list of possible candidates for next-word.

Parameters:

Name Type Description Default context str

String representing the previously typed characters (the beginning of the sentence basically).

required

Returns:

Type Description List[str]

The list of next-word candidates.

Source code in kebbie/correctors.py
def predict_next_word(self, context: str) -> List[str]:\n    \"\"\"Method used for next-word prediction task. Given a context, this\n    method should return a list of possible candidates for next-word.\n\n    Args:\n        context (str): String representing the previously typed characters\n            (the beginning of the sentence basically).\n\n    Returns:\n        The list of next-word candidates.\n    \"\"\"\n    return []\n
"},{"location":"public_api/#kebbie.correctors.Corrector.profiled_auto_correct","title":"profiled_auto_correct(*args, **kwargs)","text":"

Profiled (memory & runtime) version of auto_correct method.

No need to overwrite this method, unless you want to specify a custom memory and/or runtime measure.

Returns:

Type Description List[str]

List of candidates returned from the profiled method.

int

Memory consumption in bytes.

int

Runtime in nano seconds.

Source code in kebbie/correctors.py
def profiled_auto_correct(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n    \"\"\"Profiled (memory & runtime) version of `auto_correct` method.\n\n    No need to overwrite this method, unless you want to specify a custom\n    memory and/or runtime measure.\n\n    Returns:\n        List of candidates returned from the profiled method.\n        Memory consumption in bytes.\n        Runtime in nano seconds.\n    \"\"\"\n    return profile_fn(self.auto_correct, *args, **kwargs)\n
"},{"location":"public_api/#kebbie.correctors.Corrector.profiled_auto_complete","title":"profiled_auto_complete(*args, **kwargs)","text":"

Profiled (memory & runtime) version of auto_complete method.

No need to overwrite this method, unless you want to specify a custom memory and/or runtime measure.

Returns:

Type Description List[str]

List of candidates returned from the profiled method.

int

Memory consumption in bytes.

int

Runtime in nano seconds.

Source code in kebbie/correctors.py
def profiled_auto_complete(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n    \"\"\"Profiled (memory & runtime) version of `auto_complete` method.\n\n    No need to overwrite this method, unless you want to specify a custom\n    memory and/or runtime measure.\n\n    Returns:\n        List of candidates returned from the profiled method.\n        Memory consumption in bytes.\n        Runtime in nano seconds.\n    \"\"\"\n    return profile_fn(self.auto_complete, *args, **kwargs)\n
"},{"location":"public_api/#kebbie.correctors.Corrector.profiled_resolve_swipe","title":"profiled_resolve_swipe(*args, **kwargs)","text":"

Profiled (memory & runtime) version of resolve_swipe method.

No need to overwrite this method, unless you want to specify a custom memory and/or runtime measure.

Returns:

Type Description List[str]

List of candidates returned from the profiled method.

int

Memory consumption in bytes.

int

Runtime in nano seconds.

Source code in kebbie/correctors.py
def profiled_resolve_swipe(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n    \"\"\"Profiled (memory & runtime) version of `resolve_swipe` method.\n\n    No need to overwrite this method, unless you want to specify a custom\n    memory and/or runtime measure.\n\n    Returns:\n        List of candidates returned from the profiled method.\n        Memory consumption in bytes.\n        Runtime in nano seconds.\n    \"\"\"\n    return profile_fn(self.resolve_swipe, *args, **kwargs)\n
"},{"location":"public_api/#kebbie.correctors.Corrector.profiled_predict_next_word","title":"profiled_predict_next_word(*args, **kwargs)","text":"

Profiled (memory & runtime) version of predict_next_word method.

No need to overwrite this method, unless you want to specify a custom memory and/or runtime measure.

Returns:

Type Description List[str]

List of candidates returned from the profiled method.

int

Memory consumption in bytes.

int

Runtime in nano seconds.

Source code in kebbie/correctors.py
def profiled_predict_next_word(self, *args, **kwargs) -> Tuple[List[str], int, int]:\n    \"\"\"Profiled (memory & runtime) version of `predict_next_word` method.\n\n    No need to overwrite this method, unless you want to specify a custom\n    memory and/or runtime measure.\n\n    Returns:\n        List of candidates returned from the profiled method.\n        Memory consumption in bytes.\n        Runtime in nano seconds.\n    \"\"\"\n    return profile_fn(self.predict_next_word, *args, **kwargs)\n
"},{"location":"public_api/#functions","title":"Functions","text":""},{"location":"public_api/#kebbie.evaluate","title":"evaluate(corrector, lang='en-US', custom_keyboard=None, dataset=None, track_mistakes=False, n_most_common_mistakes=N_MOST_COMMON_MISTAKES, n_proc=None, seed=DEFAULT_SEED, beta=DEFAULT_BETA)","text":"

Main function of the kebbie framework, it evaluates the given Corrector.

Parameters:

Name Type Description Default corrector Corrector

The corrector to evaluate.

required lang str

Language to test. For now, only en-US is supported.

'en-US' custom_keyboard Dict

If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.

None dataset Dict[str, List[str]]

Data to use for testing. It should be a dictionary where the key is the name of the domain, and the value is a list of sentences. If None is given, it will use the SODA dataset.

None track_mistakes bool

If True, we will track the most common mistakes of the Corrector (these will be saved as TSV files in the working directory).

False n_most_common_mistakes int

If track_mistakes is set to True, the top X mistakes to record.

N_MOST_COMMON_MISTAKES n_proc int

Number of processes to use. If None, os.cpu_count() is used.

None seed int

Seed to use for running the tests.

DEFAULT_SEED beta float

Beta to use for computing the F-beta score.

DEFAULT_BETA

Raises:

Type Description UnsupportedLanguage

Exception raised if lang is set to a language that is not supported yet.

Returns:

Type Description Dict

The results, in a dictionary.

Source code in kebbie/__init__.py
def evaluate(\n    corrector: Corrector,\n    lang: str = \"en-US\",\n    custom_keyboard: Dict = None,\n    dataset: Dict[str, List[str]] = None,\n    track_mistakes: bool = False,\n    n_most_common_mistakes: int = N_MOST_COMMON_MISTAKES,\n    n_proc: Optional[int] = None,\n    seed: int = DEFAULT_SEED,\n    beta: float = DEFAULT_BETA,\n) -> Dict:\n    \"\"\"Main function of the `kebbie` framework, it evaluates the given\n    Corrector.\n\n    Args:\n        corrector (Corrector): The corrector to evaluate.\n        lang (str, optional): Language to test. For now, only `en-US` is\n            supported.\n        custom_keyboard (Dict, optional): If provided, instead of relying on\n            the keyboard layout provided by default, uses the given keyboard\n            layout.\n        dataset (Dict[str, List[str]], optional): Data to use for testing. It\n            should be a dictionary where the key is the name of the domain, and\n            the value is a list of sentences. If `None` is given, it will use\n            the SODA dataset.\n        track_mistakes (bool, optional): If `True`, we will track the most\n            common mistakes of the Corrector (these will be saved as TSV files\n            in the working directory).\n        n_most_common_mistakes (int, optional): If `track_mistakes` is set to\n            `True`, the top X mistakes to record.\n        n_proc (int, optional): Number of processes to use. If `None`,\n            `os.cpu_count()` is used.\n        seed (int): Seed to use for running the tests.\n        beta (float, optional): Beta to use for computing the F-beta score.\n\n    Raises:\n        UnsupportedLanguage: Exception raised if `lang` is set to a language\n            that is not supported yet.\n\n    Returns:\n        The results, in a dictionary.\n    \"\"\"\n    if lang not in SUPPORTED_LANG and custom_keyboard is None:\n        raise UnsupportedLanguage(f\"{lang} is not supported yet. List of supported languages : {SUPPORTED_LANG}\")\n\n    if dataset is None:\n        dataset = get_soda_dataset()\n\n    # Create the Oracle, the class used to create test cases and evaluate the scores\n    oracle = Oracle(\n        lang,\n        dataset,\n        custom_keyboard=custom_keyboard,\n        track_mistakes=track_mistakes,\n        n_most_common_mistakes=n_most_common_mistakes,\n        beta=beta,\n    )\n\n    # Run the tests & get the results\n    results = oracle.test(corrector, n_proc=n_proc, seed=seed)\n    return results\n
"},{"location":"public_api/#exceptions","title":"Exceptions","text":""},{"location":"public_api/#kebbie.UnsupportedLanguage","title":"UnsupportedLanguage","text":"

Bases: Exception

Custom Exception when the required language is not supported.

Source code in kebbie/__init__.py
class UnsupportedLanguage(Exception):\n    \"\"\"Custom Exception when the required language is not supported.\"\"\"\n\n    pass\n
"},{"location":"usage/","title":"Usage","text":"

kebbie exposes a class Corrector and a function evaluate().

The user creates a custom class which inherits from Corrector, over-write methods such as auto_correct(), auto_complete(), predict_next_word(), and resolve_swipe(). Then the user calls evaluate() with the custom Corrector, which will run the benchmark and return the results as a Dictionary (it contains various metrics for each task).

Let's see how to do that in details with a basic example : we will use pyspellchecker, a pure-Python spell-checking library, and test it using kebbie to see how well it performs.

"},{"location":"usage/#creating-your-own-corrector","title":"Creating your own Corrector","text":"

First, we define a subclass of Corrector, and we implement the constructor.

In our case, the constructor will simply initialize the pyspellchecker library :

from spellchecker import SpellChecker\nfrom kebbie import Corrector\n\n\nclass ExampleCorrector(Corrector):\n    def __init__(self):\n        self.spellchecker = SpellChecker()\n

For this example we are only interested in auto-correction (spell-checking). So we need to over-write the auto_correct() method.

The implementation is straightforward thanks to pyspellchecker :

from typing import List\n\nfrom spellchecker import SpellChecker\nfrom kebbie import Corrector\n\n\nclass ExampleCorrector(Corrector):\n    def __init__(self):\n        self.spellchecker = SpellChecker()\n\n    def auto_correct(self, context: str, keystrokes, word: str) -> List[str]:\n        cands = self.spellchecker.candidates(word)\n        return list(cands) if cands is not None else []\n

Great ! We have a testable Corrector class.

Info

We didn't overwrite the methods for the other tasks, and that's fine ! Other tasks' score will be set to 0, but we are just interested in auto-correction score anyway.

"},{"location":"usage/#calling-the-evaluate-function","title":"Calling the evaluate() function","text":"

Once we have the Corrector implemented, we can simply instantiate it and call the evaluate() function :

import json\nfrom typing import List\n\nfrom spellchecker import SpellChecker\nfrom kebbie import Corrector, evaluate\n\n\nclass ExampleCorrector(Corrector):\n    def __init__(self):\n        self.spellchecker = SpellChecker()\n\n    def auto_correct(self, context: str, keystrokes, word: str) -> List[str]:\n        cands = self.spellchecker.candidates(word)\n        return list(cands) if cands is not None else []\n\n\nif __name__ == \"__main__\":\n    corrector = ExampleCorrector()\n    results = evaluate(corrector)\n\n    # Save the results in a local file for later inspection\n    with open(\"results.json\", \"w\") as f:\n        json.dump(results, f, ensure_ascii=False, indent=4)\n

And that's it !

Now you can just run your script. It might take some time to go over the 2 000 sentences of the test set, but eventually it will end and you should see a file results.json in your working directory.

"},{"location":"usage/#inspecting-the-results","title":"Inspecting the results","text":"

Go ahead and open the file results.json.

It contains the results of the test, with various metrics.

Results for pyspellchecker==0.8.1 at the time of writing
{\n    \"next_word_prediction\": {\n        \"score\": {\n            \"accuracy\": 0,\n            \"top3_accuracy\": 0,\n            \"n\": 46978\n        },\n        \"per_domain\": {\n            \"narrative\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 32044\n            },\n            \"dialogue\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 14934\n            }\n        },\n        \"performances\": {\n            \"mean_memory\": \"865.0 KB\",\n            \"min_memory\": \"8.24 KB\",\n            \"max_memory\": \"1.1 MB\",\n            \"mean_runtime\": \"5.91 \u03bcs\",\n            \"fastest_runtime\": \"0 ns\",\n            \"slowest_runtime\": \"2.13 ms\"\n        }\n    },\n    \"auto_completion\": {\n        \"score\": {\n            \"accuracy\": 0,\n            \"top3_accuracy\": 0,\n            \"n\": 46910\n        },\n        \"per_domain\": {\n            \"narrative\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 32002\n            },\n            \"dialogue\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 14908\n            }\n        },\n        \"per_completion_rate\": {\n            \"<25%\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 1335\n            },\n            \"25%~50%\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 8891\n            },\n            \"50%~75%\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 25757\n            },\n            \">75%\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 10927\n            }\n        },\n        \"per_other\": {\n            \"without_typo\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 43450\n            },\n            \"with_typo\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 3460\n            }\n        },\n        \"performances\": {\n            \"mean_memory\": \"865.0 KB\",\n            \"min_memory\": \"424 B\",\n            \"max_memory\": \"1.1 MB\",\n            \"mean_runtime\": \"9.57 \u03bcs\",\n            \"fastest_runtime\": \"0 ns\",\n            \"slowest_runtime\": \"89.8 ms\"\n        }\n    },\n    \"auto_correction\": {\n        \"score\": {\n            \"accuracy\": 0.87,\n            \"precision\": 0.47,\n            \"recall\": 0.35,\n            \"fscore\": 0.41,\n            \"top3_accuracy\": 0.88,\n            \"top3_precision\": 0.56,\n            \"top3_recall\": 0.5,\n            \"top3_fscore\": 0.53,\n            \"n_typo\": 6302,\n            \"n\": 48864\n        },\n        \"per_domain\": {\n            \"narrative\": {\n                \"accuracy\": 0.87,\n                \"precision\": 0.48,\n                \"recall\": 0.36,\n                \"fscore\": 0.42,\n                \"top3_accuracy\": 0.89,\n                \"top3_precision\": 0.57,\n                \"top3_recall\": 0.51,\n                \"top3_fscore\": 0.54,\n                \"n_typo\": 4247,\n                \"n\": 32948\n            },\n            \"dialogue\": {\n                \"accuracy\": 0.86,\n                \"precision\": 0.44,\n                \"recall\": 0.34,\n                \"fscore\": 0.39,\n                \"top3_accuracy\": 0.88,\n                \"top3_precision\": 0.53,\n                \"top3_recall\": 0.48,\n                \"top3_fscore\": 0.51,\n                \"n_typo\": 2055,\n                \"n\": 15916\n            }\n        },\n        \"per_typo_type\": {\n            \"DELETE_SPELLING_SYMBOL\": {\n                \"accuracy\": 0.83,\n                \"precision\": 0.15,\n                \"recall\": 0.07,\n                \"fscore\": 0.099,\n                \"top3_accuracy\": 0.84,\n                \"top3_precision\": 0.26,\n                \"top3_recall\": 0.14,\n                \"top3_fscore\": 0.19,\n                \"n_typo\": 129,\n                \"n\": 1000\n            },\n            \"DELETE_SPACE\": {\n                \"accuracy\": 0.83,\n                \"precision\": 0.11,\n                \"recall\": 0.051,\n                \"fscore\": 0.074,\n                \"top3_accuracy\": 0.83,\n                \"top3_precision\": 0.11,\n                \"top3_recall\": 0.051,\n                \"top3_fscore\": 0.074,\n                \"n_typo\": 137,\n                \"n\": 1062\n            },\n            \"DELETE_PUNCTUATION\": {\n                \"accuracy\": 0,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 0,\n                \"n\": 0\n            },\n            \"DELETE_CHAR\": {\n                \"accuracy\": 0.86,\n                \"precision\": 0.42,\n                \"recall\": 0.29,\n                \"fscore\": 0.35,\n                \"top3_accuracy\": 0.88,\n                \"top3_precision\": 0.55,\n                \"top3_recall\": 0.48,\n                \"top3_fscore\": 0.52,\n                \"n_typo\": 559,\n                \"n\": 4334\n            },\n            \"ADD_SPELLING_SYMBOL\": {\n                \"accuracy\": 0,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 0,\n                \"n\": 0\n            },\n            \"ADD_SPACE\": {\n                \"accuracy\": 0,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 0,\n                \"n\": 0\n            },\n            \"ADD_PUNCTUATION\": {\n                \"accuracy\": 0,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 0,\n                \"n\": 0\n            },\n            \"ADD_CHAR\": {\n                \"accuracy\": 0.9,\n                \"precision\": 0.6,\n                \"recall\": 0.59,\n                \"fscore\": 0.59,\n                \"top3_accuracy\": 0.92,\n                \"top3_precision\": 0.66,\n                \"top3_recall\": 0.76,\n                \"top3_fscore\": 0.7,\n                \"n_typo\": 855,\n                \"n\": 6629\n            },\n            \"SUBSTITUTE_CHAR\": {\n                \"accuracy\": 0.86,\n                \"precision\": 0.47,\n                \"recall\": 0.35,\n                \"fscore\": 0.4,\n                \"top3_accuracy\": 0.88,\n                \"top3_precision\": 0.55,\n                \"top3_recall\": 0.49,\n                \"top3_fscore\": 0.53,\n                \"n_typo\": 863,\n                \"n\": 6691\n            },\n            \"SIMPLIFY_ACCENT\": {\n                \"accuracy\": 0,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 0,\n                \"n\": 0\n            },\n            \"SIMPLIFY_CASE\": {\n                \"accuracy\": 0.82,\n                \"precision\": 0,\n                \"recall\": 0,\n                \"fscore\": 0,\n                \"top3_accuracy\": 0.82,\n                \"top3_precision\": 0,\n                \"top3_recall\": 0,\n                \"top3_fscore\": 0,\n                \"n_typo\": 403,\n                \"n\": 3125\n            },\n            \"TRANSPOSE_CHAR\": {\n                \"accuracy\": 0.89,\n                \"precision\": 0.58,\n                \"recall\": 0.54,\n                \"fscore\": 0.56,\n                \"top3_accuracy\": 0.91,\n                \"top3_precision\": 0.64,\n                \"top3_recall\": 0.7,\n                \"top3_fscore\": 0.66,\n                \"n_typo\": 1313,\n                \"n\": 10181\n            },\n            \"COMMON_TYPO\": {\n                \"accuracy\": 0.85,\n                \"precision\": 0.39,\n                \"recall\": 0.26,\n                \"fscore\": 0.32,\n                \"top3_accuracy\": 0.88,\n                \"top3_precision\": 0.53,\n                \"top3_recall\": 0.45,\n                \"top3_fscore\": 0.49,\n                \"n_typo\": 1725,\n                \"n\": 13375\n            }\n        },\n        \"per_number_of_typos\": {\n            \"1\": {\n                \"accuracy\": 0.87,\n                \"precision\": 0.47,\n                \"recall\": 0.36,\n                \"fscore\": 0.41,\n                \"top3_accuracy\": 0.89,\n                \"top3_precision\": 0.56,\n                \"top3_recall\": 0.51,\n                \"top3_fscore\": 0.54,\n                \"n_typo\": 5984,\n                \"n\": 46397\n            },\n            \"2\": {\n                \"accuracy\": 0.86,\n                \"precision\": 0.43,\n                \"recall\": 0.29,\n                \"fscore\": 0.35,\n                \"top3_accuracy\": 0.87,\n                \"top3_precision\": 0.47,\n                \"top3_recall\": 0.36,\n                \"top3_fscore\": 0.41,\n                \"n_typo\": 292,\n                \"n\": 2264\n            },\n            \"3+\": {\n                \"accuracy\": 0.83,\n                \"precision\": 0.17,\n                \"recall\": 0.077,\n                \"fscore\": 0.11,\n                \"top3_accuracy\": 0.84,\n                \"top3_precision\": 0.23,\n                \"top3_recall\": 0.12,\n                \"top3_fscore\": 0.16,\n                \"n_typo\": 26,\n                \"n\": 202\n            }\n        },\n        \"performances\": {\n            \"mean_memory\": \"866.0 KB\",\n            \"min_memory\": \"7.05 KB\",\n            \"max_memory\": \"1.1 MB\",\n            \"mean_runtime\": \"358.0 ms\",\n            \"fastest_runtime\": \"69.1 \u03bcs\",\n            \"slowest_runtime\": \"77.1 s\"\n        }\n    },\n    \"swipe_resolution\": {\n        \"score\": {\n            \"accuracy\": 0,\n            \"top3_accuracy\": 0,\n            \"n\": 417\n        },\n        \"per_domain\": {\n            \"narrative\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 313\n            },\n            \"dialogue\": {\n                \"accuracy\": 0,\n                \"top3_accuracy\": 0,\n                \"n\": 104\n            }\n        },\n        \"performances\": {\n            \"mean_memory\": \"860.0 KB\",\n            \"min_memory\": \"96.0 KB\",\n            \"max_memory\": \"1.1 MB\",\n            \"mean_runtime\": \"24.5 \u03bcs\",\n            \"fastest_runtime\": \"0 ns\",\n            \"slowest_runtime\": \"4.68 ms\"\n        }\n    },\n    \"overall_score\": 0.164\n}\n

Let's go over the content quickly.

First, the metrics are divided into each tasks :

  • next_word_prediction
  • auto_completion
  • auto_correction
  • swipe_resolution

Info

At the end of the file, there is also a field overall_score. This is just an aggregation of the scores of all tasks, to have an easy way to compare runs.

As expected, if you look at other tasks than auto_correction, their score is zero. That's expected, because we are interested only on auto-correction, and we didn't implement the code for the other tasks.

Let's take a deeper look at the auto_correction results.

First, we have a score field, which contains various overall metrics about the auto-correction capability : precision, recall, F-score, etc...

There is also a value n, which shows the total number of words we tried to auto-correct, and n_typo, the number of words which contained a typo.

For auto-correction, the metric we care about is the F-score, as it measure both the precision and the recall.

Info

For more information about the metrics and their meaning, check out the Metrics page.

Then we have a per_domain field, which also contains the same metrics, but divided into the various domains of our dataset. We can see that pyspellchecker is better at correcting narrative data than dialogue data, since the F-score is higher.

We then have a per_typo_type field, which shows the metrics for each type of typo introduced. Note that the evaluate() does not introduce all type of typos by default, so some of them are set to 0.

After we have a per_number_of_typos field, which gives the metrics depending on how many typos were introduced in that word.

And finally we have a field performances, which show the memory consumption and runtime for the auto_correct() method that we wrote.

"},{"location":"usage/#a-note-about-multiprocessing","title":"A note about multiprocessing","text":"

Under the hood, evaluate() uses multiprocessing to run faster.

It means that your Corrector should be pickable !

Example

In the example above, the implementation provided is already pickable, so there is nothing to do.

If you need to make your class pickable, just implement the __reduce__() magic method, like this :

from typing import Tuple\n\nfrom kebbie import Corrector\n\n\nclass GreatCorrector(Corrector):\n    def __init__(self, model_path: str):\n        self.m_path = model_path\n\n        # Because of this (imaginary) non-pickable attribute,\n        # the class `GreatCorrector` is not pickable as-is\n        self.non_pickable_model = load_model(model_path)\n\n    def __reduce__(self) -> Tuple:\n        # But by implementing `__reduce__()`, we can make it pickable !\n        return (GreatCorrector, (self.m_path,))\n
"},{"location":"usage/#advanced-usage","title":"Advanced usage","text":""},{"location":"usage/#leveraging-the-keystroke-coordinates","title":"Leveraging the keystroke coordinates","text":"

Did you notice that in our auto_correct() implementation, there is an argument keystrokes that we didn't use ?

class ExampleCorrector(Corrector):\n    def __init__(self):\n        self.spellchecker = SpellChecker()\n\n    def auto_correct(self, context: str, keystrokes, word: str) -> List[str]:\n        cands = self.spellchecker.candidates(word)\n        return list(cands) if cands is not None else []\n

This keystrokes argument is a list of keystrokes coordinates (one per character of the typed word).

These coordinates may hold useful information : for example on a QWERTY keyboard, if the word typed is lovw but the keystroke for w is very close to the border of the e key... There is a great chance that the word should be auto-corrected to love...

These coordinates are defined in a layout file internally. To interact easily with the layout, you can use the LayoutHelper class.

You can use the method get_key_info() to retrieve data about the key for the given character.

For example, let's compute the distance between the first keystroke of the word, and the key for the character w :

import math\nfrom kebbie.layout import LayoutHelper\n\nlayout = LayoutHelper()\n\ndef auto_correct(self, context: str, keystrokes, word: str) -> List[str]:\n    _, _, w_key_center_x, w_key_center_y, _ = layout.get_key_info(\"w\")\n    if len(keystrokes) > 0 and keystrokes[0] is not None:\n        print(math.dist(keystrokes[0], [w_key_center_x, w_key_center_y]))\n
"},{"location":"usage/#custom-dataset","title":"Custom dataset","text":"

The evaluate() function uses a good default dataset (see Test data) to run the evaluation.

However, you might want to run the evaluation on your own dataset.

You can do this by passing your custom dataset to the evaluate() function :

my_dataset = load_my_private_dataset()\ncorrector = ExampleCorrector()\nresults = evaluate(corrector, dataset=my_dataset)\n

Your custom dataset should be a Dict[str, List[str]], where each keys of the dictionary represents a specific domain, and the values are just the list of sentences.

"},{"location":"usage/#get-insights-on-most-common-mistakes","title":"Get insights on most common mistakes","text":"

When trying to improve your models, you might want to take a look at the most common mistakes your model is doing.

You can achieve this simply by passing track_mistakes=True to the evaluate() function :

corrector = ExampleCorrector()\nresults = evaluate(corrector, track_mistakes=True)\n

It will record the most common mistakes your Corrector is doing, and add them in a new field (most_common_mistakes) in the returned results.

The mistakes are tracked for the following tasks : next-word prediction, auto-completion, and auto-correction.

Let's look at the most common mistakes for our example with pyspellchecker :

\"auto_correction\": [\n    [\n        \"Count\",\n        \"Expected\",\n        \"Predictions\",\n        \"Context\"\n    ],\n    [\n        266,\n        \"I'm\",\n        \"[ism, h'm]\",\n        \"Kolten beckoned Aida over wanting to hear what he had to say Aida I want to know what's on your mind Kolten said I'm\"\n    ],\n    [\n        157,\n        \"to\",\n        \"[tho]\",\n        \"Destanie was so angry that he felt like he might explode He felt the hot blood rushing to his head and his fists clenched tightly at his sides He took a deep breath and tried tho\"\n    ],\n    ...\n

Here we can see that we track several thing for each mistake :

  • Count : The total number of times this mistake happened
  • Expected : The expected word
  • Predictions : The model's predictions
  • Context : An example of a sentence where the mistake happened

So we can see that the most common mistake of pyspellchecker is to try to auto correct I'm into ism, even though it should not be corrected. This mistake was encountered 266 times during the evaluation.

The second most common mistake is to not auto-correct tho, even though it should be corrected to to. This mistake was encountered 157 times during the evaluation.

Tip

By default, the 1 000 most common mistakes will be saved. You can specify a different n, with the n_most_common_mistakes argument :

corrector = ExampleCorrector()\nresults = evaluate(corrector, track_mistakes=True, n_most_common_mistakes=150)\n
"},{"location":"usage/#other-arguments","title":"Other arguments","text":"

Specify the number of processes to be used for multiprocessing with the n_proc argument :

corrector = ExampleCorrector()\nresults = evaluate(corrector, n_proc=4)\n

Note

If None is given, evaluate() will use os.cpu_count() (the number of CPU of your machine). Defaults to None.

Specify a different seed with the seed argument :

corrector = ExampleCorrector()\nresults = evaluate(corrector, seed=36)\n

Specify a different Beta for the F-score calculation (see the Metrics section) with the beta argument :

corrector = ExampleCorrector()\nresults = evaluate(corrector, beta=1.2)\n
"}]} \ No newline at end of file diff --git a/master/sitemap.xml.gz b/master/sitemap.xml.gz index 0a68f77..cfbca6b 100644 Binary files a/master/sitemap.xml.gz and b/master/sitemap.xml.gz differ