diff --git a/deregularise.py b/deregularise.py index c967756..3ae1cf8 100644 --- a/deregularise.py +++ b/deregularise.py @@ -34,7 +34,7 @@ import csv import re import os -from typing import Dict, List, Tuple +from typing import cast, Dict, List, Optional, Tuple from config import SD_DIR tab = '\t' @@ -50,6 +50,14 @@ wrongen = 'Wrong -en suffix' nolabel = 'Correct' +chat_errors = { + 'Overgeneralisation': 'm', + 'Lacking ge prefix': 'm', + 'Prefix ge without onset': 'm', + 'Wrong Overgeneralisation': 'm', + 'Wrong -en suffix': 'm' +} + metaarr = {} metaarr['ge'] = '' metaarr[''] = noge @@ -98,6 +106,34 @@ def correctinflection(word: str) -> List[Tuple[str, str]]: return result +def map_error(error_type: str) -> str: + try: + return chat_errors[error_type] + except KeyError: + return error_type + + +def detect_error(original: str, correction: str) -> Tuple[int, Optional[str]]: + """Detects an error comparing a text with a correction and returns + the desired editing distance and the CHAT error code + + Args: + original (str): transcribed text + correction (str): correction + + Returns: + Tuple[int, Optional[str]]: editing distance and error code + """ + error = None + for candidate, candidate_error in correctinflection(original): + if candidate == correction: + error = map_error(candidate_error) + if error is not None: + return 1, cast(str, error) + else: + return 0, None + + def alt(thestr): result = '[' + thestr + ']' return result