From 35d2de605b8460fe611ebe084260ee382015cbcd Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 14 Oct 2024 23:11:21 +0300 Subject: [PATCH] add type annotations for regex --- pyformlang/regular_expression/python_regex.py | 27 ++-- pyformlang/regular_expression/regex.py | 117 +++++++++--------- .../regular_expression/regex_objects.py | 105 ++++++++-------- pyformlang/regular_expression/regex_reader.py | 78 ++++++------ 4 files changed, 168 insertions(+), 159 deletions(-) diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index f936b0f..1e1b4ea 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -2,16 +2,17 @@ A class to read Python format regex """ -import re -import string -import unicodedata +from typing import Union +from re import compile as comp, Pattern +from string import printable +from unicodedata import lookup -# pylint: disable=cyclic-import -from pyformlang.regular_expression import regex, MisformedRegexError +from pyformlang.regular_expression import MisformedRegexError +from pyformlang.regular_expression.regex import Regex from pyformlang.regular_expression.regex_reader import \ WRONG_PARENTHESIS_MESSAGE -PRINTABLES = list(string.printable) +PRINTABLES = list(printable) TRANSFORMATIONS = { "|": "\\|", @@ -55,7 +56,7 @@ ESCAPED_OCTAL = ["\\0", "\\1", "\\2", "\\3", "\\4", "\\5", "\\6", "\\7"] -class PythonRegex(regex.Regex): +class PythonRegex(Regex): """ Represents a regular expression as used in Python. It adds the following features to the basic regex: @@ -98,11 +99,11 @@ class PythonRegex(regex.Regex): """ - def __init__(self, python_regex): - if not isinstance(python_regex, str): - python_regex = python_regex.pattern + def __init__(self, python_regex: Union[str, Pattern[str]]) -> None: + if isinstance(python_regex, str): + comp(python_regex) # Check if it is valid else: - re.compile(python_regex) # Check if it is valid + python_regex = python_regex.pattern self._python_regex = python_regex self._replace_shortcuts() @@ -114,7 +115,7 @@ def __init__(self, python_regex): self._python_regex = self._python_regex.lstrip('\b') super().__init__(self._python_regex) - def _separate(self): + def _separate(self) -> None: regex_temp = [] for symbol in self._python_regex: if self._should_escape_next_symbol(regex_temp): @@ -193,7 +194,7 @@ def _recombine(regex_to_recombine): while regex_to_recombine[idx_end] != "}": idx_end += 1 name = "".join(regex_to_recombine[idx + 2: idx_end]) - name = unicodedata.lookup(name) + name = lookup(name) temp.append(TRANSFORMATIONS.get(name, name)) idx = idx_end + 1 elif regex_to_recombine[idx] == "\\u": diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index 889e94d..842e946 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -1,16 +1,17 @@ """ Representation of a regular expression """ -from typing import Iterable - -from pyformlang import finite_automaton -# pylint: disable=cyclic-import -import pyformlang.regular_expression.regex_objects -from pyformlang import cfg -from pyformlang.finite_automaton import State -# pylint: disable=cyclic-import + +from typing import List, Iterable, Tuple, Any + +from pyformlang.finite_automaton import Epsilon as FAEpsilon +from pyformlang.finite_automaton import EpsilonNFA, State, Symbol +from pyformlang.cfg.cfg import CFG, Production +from pyformlang.cfg.utils import to_variable from pyformlang.regular_expression.regex_reader import RegexReader -from pyformlang import regular_expression +from pyformlang.regular_expression.python_regex import PythonRegex +from pyformlang.regular_expression.regex_objects import \ + Epsilon as RegexEpsilon, Empty, Concatenation, Union, KleeneStar class Regex(RegexReader): @@ -85,16 +86,11 @@ class Regex(RegexReader): """ - def __init__(self, regex): - self.head = None - self.sons = None + def __init__(self, regex: str) -> None: super().__init__(regex) + self.sons: List[Regex] = [] self._counter = 0 - self._initialize_enfa() - self._enfa = None - - def _initialize_enfa(self): - self._enfa = finite_automaton.EpsilonNFA() + self._enfa = EpsilonNFA() def get_number_symbols(self) -> int: """ Gives the number of symbols in the regex @@ -139,7 +135,7 @@ def get_number_operators(self) -> int: return 1 + sum(son.get_number_operators() for son in self.sons) return 0 - def to_epsilon_nfa(self): + def to_epsilon_nfa(self) -> EpsilonNFA: """ Transforms the regular expression into an epsilon NFA Returns @@ -154,28 +150,28 @@ def to_epsilon_nfa(self): >>> regex.to_epsilon_nfa() """ - self._initialize_enfa() + self._enfa = EpsilonNFA() s_initial = self._set_and_get_initial_state_in_enfa() s_final = self._set_and_get_final_state_in_enfa() self._process_to_enfa(s_initial, s_final) return self._enfa - def _set_and_get_final_state_in_enfa(self): + def _set_and_get_final_state_in_enfa(self) -> State: s_final = self._get_next_state_enfa() self._enfa.add_final_state(s_final) return s_final - def _get_next_state_enfa(self): - s_final = finite_automaton.State(self._counter) + def _get_next_state_enfa(self) -> State: + s_final = State(self._counter) self._counter += 1 return s_final - def _set_and_get_initial_state_in_enfa(self): + def _set_and_get_initial_state_in_enfa(self) -> State: s_initial = self._get_next_state_enfa() self._enfa.add_start_state(s_initial) return s_initial - def _process_to_enfa(self, s_from: State, s_to: State): + def _process_to_enfa(self, s_from: State, s_to: State) -> None: """ Internal function to add a regex to a given epsilon NFA Parameters @@ -190,29 +186,24 @@ def _process_to_enfa(self, s_from: State, s_to: State): else: self._process_to_enfa_when_no_son(s_from, s_to) - def _process_to_enfa_when_no_son(self, s_from, s_to): - if isinstance(self.head, - pyformlang.regular_expression.regex_objects.Epsilon): + def _process_to_enfa_when_no_son(self, s_from: State, s_to: State) -> None: + if isinstance(self.head, RegexEpsilon): self._add_epsilon_transition_in_enfa_between(s_from, s_to) - elif not isinstance(self.head, - pyformlang.regular_expression.regex_objects.Empty): - symbol = finite_automaton.Symbol(self.head.value) + elif not isinstance(self.head, Empty): + symbol = Symbol(self.head.value) self._enfa.add_transition(s_from, symbol, s_to) - def _process_to_enfa_when_sons(self, s_from, s_to): + def _process_to_enfa_when_sons(self, s_from: State, s_to: State) -> None: if isinstance( - self.head, - pyformlang.regular_expression.regex_objects.Concatenation): + self.head, Concatenation): self._process_to_enfa_concatenation(s_from, s_to) - elif isinstance(self.head, - pyformlang.regular_expression.regex_objects.Union): + elif isinstance(self.head, Union): self._process_to_enfa_union(s_from, s_to) elif isinstance( - self.head, - pyformlang.regular_expression.regex_objects.KleeneStar): + self.head, KleeneStar): self._process_to_enfa_kleene_star(s_from, s_to) - def _process_to_enfa_kleene_star(self, s_from, s_to): + def _process_to_enfa_kleene_star(self, s_from: State, s_to: State) -> None: # pylint: disable=protected-access state_first = self._get_next_state_enfa() state_second = self._get_next_state_enfa() @@ -222,30 +213,40 @@ def _process_to_enfa_kleene_star(self, s_from, s_to): self._add_epsilon_transition_in_enfa_between(state_second, s_to) self._process_to_enfa_son(state_first, state_second, 0) - def _process_to_enfa_union(self, s_from, s_to): + def _process_to_enfa_union(self, s_from: State, s_to: State) -> None: son_number = 0 self._create_union_branch_in_enfa(s_from, s_to, son_number) son_number = 1 self._create_union_branch_in_enfa(s_from, s_to, son_number) - def _create_union_branch_in_enfa(self, s_from, s_to, son_number): + def _create_union_branch_in_enfa(self, + s_from: State, + s_to: State, + son_number: int) -> None: state0 = self._get_next_state_enfa() state2 = self._get_next_state_enfa() self._add_epsilon_transition_in_enfa_between(s_from, state0) self._add_epsilon_transition_in_enfa_between(state2, s_to) self._process_to_enfa_son(state0, state2, son_number) - def _process_to_enfa_concatenation(self, s_from, s_to): + def _process_to_enfa_concatenation(self, + s_from: State, + s_to: State) -> None: state0 = self._get_next_state_enfa() state1 = self._get_next_state_enfa() self._add_epsilon_transition_in_enfa_between(state0, state1) self._process_to_enfa_son(s_from, state0, 0) self._process_to_enfa_son(state1, s_to, 1) - def _add_epsilon_transition_in_enfa_between(self, state0, state1): - self._enfa.add_transition(state0, finite_automaton.Epsilon(), state1) + def _add_epsilon_transition_in_enfa_between(self, + state0: State, + state1: State) -> None: + self._enfa.add_transition(state0, FAEpsilon(), state1) - def _process_to_enfa_son(self, s_from, s_to, index_son): + def _process_to_enfa_son(self, + s_from: State, + s_to: State, + index_son: int) -> None: # pylint: disable=protected-access self.sons[index_son]._counter = self._counter self.sons[index_son]._enfa = self._enfa @@ -280,7 +281,7 @@ def get_tree_str(self, depth: int = 0) -> str: temp += son.get_tree_str(depth + 1) return temp - def to_cfg(self, starting_symbol="S") -> "CFG": + def to_cfg(self, starting_symbol: str = "S") -> CFG: """ Turns the regex into a context-free grammar @@ -304,11 +305,12 @@ def to_cfg(self, starting_symbol="S") -> "CFG": """ productions, _ = self._get_production(starting_symbol) - cfg_res = cfg.CFG(start_symbol=cfg.utils.to_variable(starting_symbol), + cfg_res = CFG(start_symbol=to_variable(starting_symbol), productions=set(productions)) return cfg_res - def _get_production(self, current_symbol, count=0): + def _get_production(self, current_symbol: Any, count: int = 0) \ + -> Tuple[List[Production], int]: next_symbols = [] next_productions = [] for son in self.sons: @@ -322,7 +324,7 @@ def _get_production(self, current_symbol, count=0): next_productions += new_prods return next_productions, count - def __repr__(self): + def __repr__(self) -> str: return self.head.get_str_repr([str(son) for son in self.sons]) def union(self, other: "Regex") -> "Regex": @@ -357,11 +359,11 @@ def union(self, other: "Regex") -> "Regex": """ regex = Regex("") - regex.head = pyformlang.regular_expression.regex_objects.Union() + regex.head = Union() regex.sons = [self, other] return regex - def __or__(self, other): + def __or__(self, other: "Regex") -> "Regex": """ Makes the union with another regex Parameters @@ -427,12 +429,11 @@ def concatenate(self, other: "Regex") -> "Regex": True """ regex = Regex("") - regex.head = \ - pyformlang.regular_expression.regex_objects.Concatenation() + regex.head = Concatenation() regex.sons = [self, other] return regex - def __add__(self, other): + def __add__(self, other: "Regex") -> "Regex": """ Concatenates a regular expression with an other one Parameters @@ -485,11 +486,11 @@ def kleene_star(self) -> "Regex": """ regex = Regex("") - regex.head = pyformlang.regular_expression.regex_objects.KleeneStar() + regex.head = KleeneStar() regex.sons = [self] return regex - def from_string(self, regex_str: str): + def from_string(self, regex_str: str) -> "Regex": """ Construct a regex from a string. For internal usage. Equivalent to the constructor of Regex @@ -515,7 +516,7 @@ def from_string(self, regex_str: str): """ return Regex(regex_str) - def accepts(self, word: Iterable[str]) -> bool: + def accepts(self, word: Iterable[Any]) -> bool: """ Check if a word matches (completely) the regex @@ -545,7 +546,7 @@ def accepts(self, word: Iterable[str]) -> bool: return self._enfa.accepts(word) @classmethod - def from_python_regex(cls, regex): + def from_python_regex(cls, regex: str) -> PythonRegex: """ Creates a regex from a string using the python way to write it. @@ -570,4 +571,4 @@ def from_python_regex(cls, regex): >>> Regex.from_python_regex("a+[cd]") """ - return regular_expression.PythonRegex(regex) + return PythonRegex(regex) diff --git a/pyformlang/regular_expression/regex_objects.py b/pyformlang/regular_expression/regex_objects.py index 053f9b4..73c4913 100644 --- a/pyformlang/regular_expression/regex_objects.py +++ b/pyformlang/regular_expression/regex_objects.py @@ -1,7 +1,11 @@ """ Representation of some objects used in regex. """ -import pyformlang + +from typing import List, Iterable, Any + +from pyformlang.cfg import Production +from pyformlang.cfg.utils import to_variable, to_terminal class Node: # pylint: disable=too-few-public-methods @@ -13,11 +17,11 @@ class Node: # pylint: disable=too-few-public-methods The value of the node """ - def __init__(self, value): + def __init__(self, value: Any) -> None: self._value = value @property - def value(self): + def value(self) -> Any: """ Give the value of the node Returns @@ -27,7 +31,7 @@ def value(self): """ return self._value - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: """ The string representation of the node @@ -44,7 +48,8 @@ def get_str_repr(self, sons_repr): """ raise NotImplementedError - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" raise NotImplementedError @@ -91,14 +96,15 @@ class Operator(Node): # pylint: disable=too-few-public-methods The value of the operator """ - def __repr__(self): + def __repr__(self) -> str: return "Operator(" + str(self._value) + ")" - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: """ Get the string representation """ raise NotImplementedError - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" raise NotImplementedError @@ -113,17 +119,18 @@ class Symbol(Node): # pylint: disable=too-few-public-methods The value of the symbol """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return str(self.value) - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_terminal(self.value)])] + return [Production( + to_variable(current_symbol), + [to_terminal(self.value)])] - def __repr__(self): + def __repr__(self) -> str: return "Symbol(" + str(self._value) + ")" @@ -131,15 +138,16 @@ class Concatenation(Operator): # pylint: disable=too-few-public-methods """ Represents a concatenation """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + ".".join(sons_repr) + ")" - def get_cfg_rules(self, current_symbol, sons): - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(son) for son in sons])] + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: + return [Production( + to_variable(current_symbol), + [to_variable(son) for son in sons])] - def __init__(self): + def __init__(self) -> None: super().__init__("Concatenation") @@ -147,16 +155,16 @@ class Union(Operator): # pylint: disable=too-few-public-methods """ Represents a union """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + "|".join(sons_repr) + ")" - def get_cfg_rules(self, current_symbol, sons): - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(son)]) - for son in sons] + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: + return [Production( + to_variable(current_symbol), + [to_variable(son)]) for son in sons] - def __init__(self): + def __init__(self) -> None: super().__init__("Union") @@ -164,24 +172,23 @@ class KleeneStar(Operator): # pylint: disable=too-few-public-methods """ Represents an epsilon symbol """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + ".".join(sons_repr) + ")*" - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: return [ - pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - []), - pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(current_symbol), - pyformlang.cfg.utils.to_variable(current_symbol)]), - pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(son) for son in sons]) + Production( + to_variable(current_symbol), []), + Production( + to_variable(current_symbol), + [to_variable(current_symbol), to_variable(current_symbol)]), + Production( + to_variable(current_symbol), + [to_variable(son) for son in sons]) ] - def __init__(self): + def __init__(self) -> None: super().__init__("Kleene Star") @@ -189,15 +196,14 @@ class Epsilon(Symbol): # pylint: disable=too-few-public-methods """ Represents an epsilon symbol """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "$" - def get_cfg_rules(self, current_symbol, sons): - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [])] + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[str]) \ + -> List[Production]: + return [Production(to_variable(current_symbol), [])] - def __init__(self): + def __init__(self) -> None: super().__init__("Epsilon") @@ -205,16 +211,17 @@ class Empty(Symbol): # pylint: disable=too-few-public-methods """ Represents an empty symbol """ - def __init__(self): + def __init__(self) -> None: super().__init__("Empty") - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: return [] class MisformedRegexError(Exception): """ Error for misformed regex """ - def __init__(self, message: str, regex: str): + def __init__(self, message: str, regex: str) -> None: super().__init__(message + " Regex: " + regex) self._regex = regex diff --git a/pyformlang/regular_expression/regex_reader.py b/pyformlang/regular_expression/regex_reader.py index d65e2c3..520fba9 100644 --- a/pyformlang/regular_expression/regex_reader.py +++ b/pyformlang/regular_expression/regex_reader.py @@ -2,9 +2,11 @@ A class to read regex """ -import re +from typing import List, Optional, Any +from re import sub -from pyformlang.regular_expression.regex_objects import to_node, Operator, \ +from pyformlang.regular_expression.regex_objects import \ + to_node, Node, Operator, \ Symbol, Concatenation, Union, \ KleeneStar, MisformedRegexError, SPECIAL_SYMBOLS @@ -19,40 +21,39 @@ class RegexReader: """ # pylint: disable=too-few-public-methods - def __init__(self, regex: str): - self._current_node = None - self.head = None - self.sons = None - self._end_current_group: int = 0 + def __init__(self, regex: str) -> None: + self._current_node: Optional[Node] = None + self.head: Optional[Node] = None + self.sons: List[RegexReader] = [] + self._end_current_group = 0 regex = _pre_process_regex(regex) self._regex = regex self._components = _get_regex_componants(regex) self._pre_process_input_regex_componants() - self._setup_sons() self._setup_from_regex_componants() - def _remove_useless_extreme_parenthesis_from_components(self): + def _remove_useless_extreme_parenthesis_from_components(self) -> None: if self._begins_with_parenthesis_components(): self._remove_useless_extreme_parenthesis_from_componants() - def _pre_process_input_regex_componants(self): + def _pre_process_input_regex_componants(self) -> None: self._remove_useless_extreme_parenthesis_from_components() self._compute_precedence() self._remove_useless_extreme_parenthesis_from_components() def _remove_useless_extreme_parenthesis_from_componants( - self): + self) -> None: if self._is_surrounded_by_parenthesis(): self._components = self._components[1:-1] self._remove_useless_extreme_parenthesis_from_components() - def _is_surrounded_by_parenthesis(self): + def _is_surrounded_by_parenthesis(self) -> bool: parenthesis_depths = self._get_parenthesis_depths() first_complete_closing = _find_first_complete_closing_if_possible( parenthesis_depths) return first_complete_closing == len(self._components) - 1 - def _get_parenthesis_depths(self): + def _get_parenthesis_depths(self) -> List[int]: depths = [0] for component in self._components: depths.append(depths[-1] + _get_parenthesis_value(component)) @@ -61,7 +62,7 @@ def _get_parenthesis_depths(self): def _begins_with_parenthesis_components(self): return self._components[0] == "(" - def _setup_precedence_when_not_trivial(self): + def _setup_precedence_when_not_trivial(self) -> None: self._set_end_first_group_in_components() if self._end_current_group == len(self._components): self._current_node = None @@ -69,30 +70,30 @@ def _setup_precedence_when_not_trivial(self): self._current_node = to_node( self._components[self._end_current_group]) - def _setup_precedence(self): + def _setup_precedence(self) -> None: if len(self._components) <= 1: self._current_node = None else: self._setup_precedence_when_not_trivial() - def _found_no_union(self, next_node): + def _found_no_union(self, next_node: Optional[Node]) -> bool: return self._end_current_group < len( self._components) and not isinstance(next_node, Union) - def _add_parenthesis_around_part_of_componants(self, index_opening, - index_closing): + def _add_parenthesis_around_part_of_componants( + self, index_opening: int, index_closing: int) -> None: self._components.insert(index_opening, "(") # Add 1 as something was added before self._components.insert(index_closing + 1, ")") - def _compute_precedent_when_not_kleene_nor_union(self): + def _compute_precedent_when_not_kleene_nor_union(self) -> None: while self._found_no_union(self._current_node): self._set_next_end_group_and_node() if isinstance(self._current_node, Union): self._add_parenthesis_around_part_of_componants( 0, self._end_current_group) - def _compute_precedence(self): + def _compute_precedence(self) -> None: """ Add parenthesis for the first group in indicate precedence """ self._setup_precedence() if isinstance(self._current_node, KleeneStar): @@ -102,7 +103,7 @@ def _compute_precedence(self): elif not isinstance(self._current_node, Union): self._compute_precedent_when_not_kleene_nor_union() - def _set_next_end_group_and_node(self): + def _set_next_end_group_and_node(self) -> None: if isinstance(self._current_node, Operator) and not isinstance( self._current_node, KleeneStar): self._end_current_group += 1 @@ -111,7 +112,7 @@ def _set_next_end_group_and_node(self): self._current_node = to_node( self._components[self._end_current_group]) - def _set_end_first_group_in_components(self, idx_from=0): + def _set_end_first_group_in_components(self, idx_from: int = 0) -> None: """ Gives the end of the first group """ if idx_from >= len(self._components): self._end_current_group = idx_from @@ -130,7 +131,7 @@ def _set_end_first_group_in_components(self, idx_from=0): else: self._end_current_group = 1 + idx_from - def _setup_non_trivial_regex(self): + def _setup_non_trivial_regex(self) -> None: self._set_end_first_group_in_components() next_node = to_node(self._components[self._end_current_group]) if isinstance(next_node, KleeneStar): @@ -149,15 +150,15 @@ def _setup_non_trivial_regex(self): self.sons.append(self._process_sub_regex(begin_second_group, len(self._components))) - def _setup_empty_regex(self): + def _setup_empty_regex(self) -> None: self.head = to_node("") - def _setup_one_symbol_regex(self): + def _setup_one_symbol_regex(self) -> None: first_symbol = to_node(self._components[0]) self._check_is_valid_single_first_symbol(first_symbol) self.head = first_symbol - def _setup_from_regex_componants(self): + def _setup_from_regex_componants(self) -> None: if not self._components: self._setup_empty_regex() elif len(self._components) == 1: @@ -165,18 +166,15 @@ def _setup_from_regex_componants(self): else: self._setup_non_trivial_regex() - def _process_sub_regex(self, idx_from, idx_to): + def _process_sub_regex(self, idx_from: int, idx_to: int) -> "RegexReader": sub_regex = " ".join(self._components[idx_from:idx_to]) return self.from_string(sub_regex) - def _check_is_valid_single_first_symbol(self, first_symbol): + def _check_is_valid_single_first_symbol(self, first_symbol: Any) -> None: if not isinstance(first_symbol, Symbol): raise MisformedRegexError(MISFORMED_MESSAGE, self._regex) - def _setup_sons(self): - self.sons = [] - - def from_string(self, regex_str: str): + def from_string(self, regex_str: str) -> "RegexReader": """ Read a regex from a string Parameters @@ -192,7 +190,9 @@ def from_string(self, regex_str: str): return RegexReader(regex_str) -def _find_first_complete_closing_if_possible(parenthesis_depths, index_from=0): +def _find_first_complete_closing_if_possible( + parenthesis_depths: List[int], + index_from: int = 0) -> int: try: first_complete_closing = parenthesis_depths.index(0, index_from) except ValueError: @@ -200,7 +200,7 @@ def _find_first_complete_closing_if_possible(parenthesis_depths, index_from=0): return first_complete_closing -def _get_parenthesis_value(component): +def _get_parenthesis_value(component: str) -> int: if component == "(": return 1 if component == ")": @@ -212,8 +212,8 @@ def _pre_process_regex(regex: str) -> str: regex = regex.strip(" ") if regex.endswith("\\") and not regex.endswith("\\\\"): regex += " " - regex = re.sub(r" +", " ", regex) - regex = re.sub(r"\\ ", "\\ ", regex) + regex = sub(r" +", " ", regex) + regex = sub(r"\\ ", "\\ ", regex) if regex.endswith(" "): regex = regex[:-1] res = [] @@ -232,10 +232,10 @@ def _pre_process_regex(regex: str) -> str: return "".join(res) -def _get_regex_componants(regex): +def _get_regex_componants(regex: str) -> List[str]: temp = regex.split(" ") - for i, sub in enumerate(temp): - if sub.endswith("\\") and not sub.endswith("\\\\"): + for i, component in enumerate(temp): + if component.endswith("\\") and not component.endswith("\\\\"): temp[i] += " " if len(temp) > 1 and not temp[-1]: del temp[-1]