Skip to content

Commit

Permalink
#68 documenting annotate_data classes and functions
Browse files Browse the repository at this point in the history
  • Loading branch information
alvesisaque committed Feb 7, 2023
1 parent 42b153f commit 4d23f18
Showing 1 changed file with 35 additions and 0 deletions.
35 changes: 35 additions & 0 deletions hygia/data_pipeline/annotate_data/annotate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,43 @@
from colorama import Fore, Style

class AnnotateData:
"""
A class to incorporate the data annotation phase, starting from the thresholds
(e.g., count sequence squared vowels, count sequence squared consonants) can tell if it's a ksmash.
Examples
--------
Use this class like this:
.. code-block:: python
annotate_data = hg.AnnotateData()
key_smash_thresholds = {
'count_sequence_squared_vowels': 1.00,
'count_sequence_squared_consonants': 1.999,
'count_sequence_squared_special_characters': 2.2499,
'ratio_of_numeric_digits_squared': 2.9,
'average_of_char_count_squared': 2.78,
}
df = annotate_data.annotate_data(df, concatened_column_name, key_smash_thresholds)
print(df)
"""
def annotate_data(self, df, concatened_column_name, ks_thresholds):
"""
Annotate data function.
:param df: Dataframe to extract features from.
:type df: pandas.DataFrame
:param concatened_column_name: Dataframe column to be used
:type concatened_column_name: List
:param ks_thresholds: List of thresholds
:type ks_thresholds: List
:return: The input dataframe with additional columns for key smashing and word embedding features.
:rtype: pandas.DataFrame
"""

print(f'{Fore.YELLOW}running annotate data with configs below...{Fore.WHITE}')

print(f'{Style.BRIGHT}thresholds -> {Style.NORMAL}{ks_thresholds}')
Expand Down

0 comments on commit 4d23f18

Please sign in to comment.