diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index f001c54..0df06b1 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -20,23 +20,35 @@ def __init__(self, lang1: str, lang2: str) -> None: lang1 (str) lang2 (str) """ - self.translation_cmds: Dict[Tuple[str, str], List[List[str]]] = {} + self.translation_cmds: Dict[Tuple[str, str, str, str], List[List[str]]] = {} self.lang1 = lang1 self.lang2 = lang2 - def _get_commands(self, lang1: str, lang2: str) -> List[List[str]]: + def _get_commands(self, lang1: str, lang2: str, mark_unknown: bool = True, + display_ambiguity: bool = False,) -> List[List[str]]: """ Args: lang1 (str) lang2 (str) + option (Optional[str]) + option_tagger (Optional[str]) Returns: List[List[str]] """ - if (lang1, lang2) not in self.translation_cmds: + key = (lang1, lang2, mark_unknown, display_ambiguity) + + if key not in self.translation_cmds: mode_path = apertium.pairs['%s-%s' % (lang1, lang2)] - self.translation_cmds[(lang1, lang2)] = parse_mode_file(mode_path) - return self.translation_cmds[(lang1, lang2)] + # Deal with parameters + option = None + option_tagger = None + if mark_unknown == False: + option = ['-n'] + if display_ambiguity: + option_tagger = ['-m'] + self.translation_cmds[key] = parse_mode_file(mode_path, option, option_tagger) + return self.translation_cmds[key] def _get_format(self, formatting: Optional[str], deformat: Optional[str], reformat: Optional[str]) -> Tuple[Optional[str], Optional[str]]: """ @@ -146,7 +158,8 @@ def _get_reformat(self, reformat: str, text: str) -> bytes: result = re.sub(rb'\0$', b'', text) # type: ignore return result - def translate(self, text: str, mark_unknown: bool = False, formatting: Optional[str] = None, deformat: str = 'txt', reformat: str = 'txt') -> str: + def translate(self, text: str, mark_unknown: bool = True, display_ambiguity: bool = False, + formatting: Optional[str] = None, deformat: str = 'txt', reformat: str = 'txt') -> str: """ Args: text (str) @@ -165,7 +178,7 @@ def translate(self, text: str, mark_unknown: bool = False, formatting: Optional[ if pair is not None: lang1, lang2 = pair - cmds = list(self._get_commands(lang1, lang2)) + cmds = list(self._get_commands(lang1, lang2, mark_unknown, display_ambiguity)) unsafe_deformat, unsafe_reformat = self._get_format(formatting, deformat, reformat) deformater, reformater = self._validate_formatters(unsafe_deformat, unsafe_reformat) deformatted = self._get_deformat(str(deformater), text) @@ -174,7 +187,7 @@ def translate(self, text: str, mark_unknown: bool = False, formatting: Optional[ return result.decode() -def translate(lang1: str, lang2: str, text: str, mark_unknown: bool = False, +def translate(lang1: str, lang2: str, text: str, mark_unknown: bool = True, display_ambiguity: bool = False, formatting: Optional[str] = None, deformat: str = 'txt', reformat: str = 'txt') -> str: """ Args: diff --git a/apertium/utils.py b/apertium/utils.py index 423e710..2105205 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -3,7 +3,7 @@ import subprocess import sys import tempfile -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Tuple, Union, Optional try: if platform.system() == 'Linux': @@ -176,7 +176,7 @@ def execute_pipeline(inp: str, commands: List[List[str]]) -> str: return end.decode() -def parse_mode_file(mode_path: str) -> List[List[str]]: +def parse_mode_file(mode_path: str, option: Optional[List[str]], option_tagger: Optional[List[str]]) -> List[List[str]]: """ Args: mode_path (str) @@ -184,6 +184,12 @@ def parse_mode_file(mode_path: str) -> List[List[str]]: Returns: List[List[str]] """ + + if option is None: + option = ['-g'] + if option_tagger is None: + option_tagger = [] + with open(mode_path) as mode_file: mode_str = mode_file.read().strip() if mode_str: @@ -192,7 +198,8 @@ def parse_mode_file(mode_path: str) -> List[List[str]]: # TODO: we should make language pairs install # modes.xml instead; this is brittle (what if a path # has | or ' in it?) - cmd = cmd.replace('$2', '').replace('$1', '-g') + cmd = cmd.replace('$1', ' '.join(option)) + cmd = cmd.replace('$2', ' '.join(option_tagger)) commands.append([c.strip("'") for c in cmd.split()]) return commands else: