Skip to content

Commit

Permalink
Add precedence system
Browse files Browse the repository at this point in the history
  • Loading branch information
GrandMoff100 authored Apr 16, 2024
1 parent 60820aa commit 2184608
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 8 deletions.
4 changes: 3 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
release = __version__
branch = (
"master"
if __version__.endswith("a") or __version__.endswith("b") or __version__.endswith("rc")
if __version__.endswith("a")
or __version__.endswith("b")
or __version__.endswith("rc")
else "v" + __version__
)

Expand Down
3 changes: 2 additions & 1 deletion examples/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
port = Optional(Group(RegexPattern(":") + Amount(DIGIT, 1, or_more=True)))
path = Amount(
Group(
RegexPattern("/") + Group(Amount(NotSet("/", "#", "?", "&", WHITESPACE), 0, or_more=True))
RegexPattern("/")
+ Group(Amount(NotSet("/", "#", "?", "&", WHITESPACE), 0, or_more=True))
),
0,
or_more=True,
Expand Down
4 changes: 2 additions & 2 deletions regexfactory/chars.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
ANY = RegexPattern(r".")

#: (Caret.) Matches the start of the string, and in :data:`re.MULTILINE` mode also matches immediately after each newline.
ANCHOR_START = RegexPattern(r"^")
ANCHOR_START = RegexPattern(r"^", _precedence=2)

#: Matches the end of the string or just before the newline at the end of the string, and in :data:`re.MULTILINE` mode also matches before a newline. foo matches both :code:`foo` and :code:`foobar`, while the regular expression :code:`foo$` matches only :code:`foo`. More interestingly, searching for :code:`foo.$` in :code:`foo1\nfoo2\n` matches :code:`foo2` normally, but :code:`foo1` in :data:`re.MULTILINE` mode; searching for a single $ in :code:`foo\n` will find two (empty) matches: one just before the newline, and one at the end of the string.
ANCHOR_END = RegexPattern(r"$")
ANCHOR_END = RegexPattern(r"$", _precedence=2)

#: Matches Unicode whitespace characters (which includes :code:`[ \t\n\r\f\v]`, and also many other characters, for example the non-breaking spaces mandated by typography rules in many languages). If the :data:`re.ASCII` flag is used, only :code:`[ \t\n\r\f\v]` is matched.
WHITESPACE = RegexPattern(r"\s")
Expand Down
34 changes: 30 additions & 4 deletions regexfactory/pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,13 @@ class RegexPattern:

regex: str

def __init__(self, pattern: ValidPatternType, /) -> None:
#: The precedence of the pattern. Higher precedence patterns are evaluated first.
# Precedence order here (https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04_08)
precedence: int

def __init__(self, pattern: ValidPatternType, /, _precedence: int = 1) -> None:
self.regex = self.get_regex(pattern)
self.precedence = _precedence if not isinstance(pattern, RegexPattern) else pattern.precedence

def __repr__(self) -> str:
raw_regex = f"{self.regex!r}".replace("\\\\", "\\")
Expand All @@ -47,12 +52,33 @@ def __str__(self) -> str:

def __add__(self, other: ValidPatternType) -> "RegexPattern":
"""Adds two :class:`ValidPatternType`'s together, into a :class:`RegexPattern`"""
from .patterns import Group # prevent circular import

try:
other_pattern = RegexPattern(other) if not isinstance(other, RegexPattern) else other
except TypeError:
return NotImplemented

if self.precedence > other_pattern.precedence:
return RegexPattern(self.regex + self.get_regex(Group(other_pattern, capturing=False)))
if self.precedence < other_pattern.precedence:
return RegexPattern(self.get_regex(Group(self, capturing=False)) + other_pattern.regex)
return RegexPattern(self.regex + other_pattern.regex)

def __radd__(self, other: ValidPatternType) -> "RegexPattern":
"""Adds two :class:`ValidPatternType`'s together, into a :class:`RegexPattern`"""
from .patterns import Group # prevent circular import

try:
other = self.get_regex(other)
other_pattern = RegexPattern(other) if not isinstance(other, RegexPattern) else other
except TypeError:
return NotImplemented

return RegexPattern(self.regex + other)
if self.precedence > other_pattern.precedence:
return RegexPattern(self.get_regex(Group(other_pattern, capturing=False)) + self.regex)
if self.precedence < other_pattern.precedence:
return RegexPattern(other_pattern.regex + self.get_regex(Group(self, capturing=False)))
return RegexPattern(other_pattern.regex + self.regex)

def __mul__(self, coefficient: int) -> "RegexPattern":
"""Treats :class:`RegexPattern` as a string and multiplies it by an integer."""
Expand All @@ -64,7 +90,7 @@ def __eq__(self, other: Any) -> bool:
Otherwise return false.
"""
if isinstance(other, (str, re.Pattern, RegexPattern)):
return self.regex == self.get_regex(other)
return self.regex == RegexPattern(other).regex and self.precedence == RegexPattern(other).precedence
return super().__eq__(other)

def __hash__(self) -> int:
Expand Down

0 comments on commit 2184608

Please sign in to comment.