Skip to content

Commit

Permalink
fix tag regexp to match quoted groups correctly
Browse files Browse the repository at this point in the history
Fixed issue in lexer where the regexp used to match tags would not
correctly interpret quoted sections individually. While this parsing issue
still produced the same expected tag structure later on, the mis-handling
of quoted sections was also subject to a regexp crash if a tag had a large
number of quotes within its quoted sections.

Fixes: #366
Change-Id: I74e0d71ff7f419970711a7cd51adcf1bb90a44c0
  • Loading branch information
zzzeek committed Aug 29, 2022
1 parent 7c5b28a commit 9257602
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 8 deletions.
9 changes: 9 additions & 0 deletions doc/build/unreleased/366.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
.. change::
:tags: bug, lexer
:tickets: 366

Fixed issue in lexer where the regexp used to match tags would not
correctly interpret quoted sections individually. While this parsing issue
still produced the same expected tag structure later on, the mis-handling
of quoted sections was also subject to a regexp crash if a tag had a large
number of quotes within its quoted sections.
12 changes: 8 additions & 4 deletions mako/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,20 +272,24 @@ def parse(self):
return self.template

def match_tag_start(self):
match = self.match(
r"""
reg = r"""
\<% # opening tag
([\w\.\:]+) # keyword
((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \
((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \
# sign, string expression
# comma is for backwards compat
# identified in #366
\s* # more whitespace
(/)?> # closing
""",
"""

match = self.match(
reg,
re.I | re.S | re.X,
)

Expand Down
21 changes: 17 additions & 4 deletions test/test_lexer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import re

import pytest

from mako import compat
from mako import exceptions
from mako import parsetree
Expand Down Expand Up @@ -146,6 +148,10 @@ def test_noexpr_allowed(self):
"""
assert_raises(exceptions.CompileException, Lexer(template).parse)

def test_tag_many_quotes(self):
template = "<%0" + '"' * 3000
assert_raises(exceptions.SyntaxException, Lexer(template).parse)

def test_unmatched_tag(self):
template = """
<%namespace name="bar">
Expand Down Expand Up @@ -432,9 +438,16 @@ def test_expr_in_attribute(self):
),
)

def test_pagetag(self):
template = """
<%page cached="True", args="a, b"/>
@pytest.mark.parametrize("comma,numchars", [(",", 48), ("", 47)])
def test_pagetag(self, comma, numchars):
# note that the comma here looks like:
# <%page cached="True", args="a, b"/>
# that's what this test has looked like for decades, however, the
# comma there is not actually the right syntax. When issue #366
# was fixed, the reg was altered to accommodate for this comma to allow
# backwards compat
template = f"""
<%page cached="True"{comma} args="a, b"/>
some template
"""
Expand All @@ -453,7 +466,7 @@ def test_pagetag(self):
some template
""",
(2, 48),
(2, numchars),
),
],
),
Expand Down

0 comments on commit 9257602

Please sign in to comment.