From 0b8480f5e1afcee08d2c25cc8548ad2c11b632de Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Thu, 19 Nov 2020 14:22:48 -0500 Subject: [PATCH 1/4] fix unclosed pi in code span --- markdown/extensions/md_in_html.py | 10 ++++++++++ markdown/htmlparser.py | 8 ++++++++ tests/test_syntax/blocks/test_html_blocks.py | 21 ++++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 6d2a0e78d..7f249aa25 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -206,6 +206,16 @@ def handle_empty_tag(self, data, is_block): else: self.handle_data(self.md.htmlStash.store(data)) + def parse_pi(self, i): + if self.at_line_start() or self.intail or self.mdstack: + # The same override exists in HTMLExtractor without the check + # for mdstack. Therefore, use HTMLExtractor's parent instead. + return super(HTMLExtractor, self).parse_pi(i) + # This is not the beginning of a raw block so treat as plain data + # and avoid consuming any tags which may follow (see #1066). + self.handle_data('' if data.startswith('CDATA[') else ']>' self.handle_empty_tag(' + foo + + """ + ), + self.dedent( + """ +

<?php

+
+ foo +
+ """ + ) + ) + def test_raw_declaration_one_line(self): self.assertMarkdownRenders( '', From bfb109f36dd0b2265077f14a85757e50666789b4 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Fri, 20 Nov 2020 14:39:46 -0500 Subject: [PATCH 2/4] fix unclosed dec in code span --- markdown/extensions/md_in_html.py | 10 +++++ markdown/htmlparser.py | 9 +++++ tests/test_syntax/blocks/test_html_blocks.py | 42 ++++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 7f249aa25..86cf00d79 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -216,6 +216,16 @@ def parse_pi(self, i): self.handle_data(' + foo + + """ + ), + self.dedent( + """ +

<!

+
+ foo +
+ """ + ) + ) + def test_raw_cdata_one_line(self): self.assertMarkdownRenders( '"); ]]>', @@ -1211,6 +1232,27 @@ def test_raw_cdata_indented(self): ) ) + def test_raw_cdata_code_span(self): + self.assertMarkdownRenders( + self.dedent( + """ + ` + foo + + """ + ), + self.dedent( + """ +

<![

+
+ foo +
+ """ + ) + ) + def test_charref(self): self.assertMarkdownRenders( '§', From 0807ffa29c5fa2f9bae7a21de5c2fbcff298fd4a Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Fri, 20 Nov 2020 15:30:19 -0500 Subject: [PATCH 3/4] fix unclosed tag in code span --- markdown/htmlparser.py | 17 +++++++- tests/test_syntax/blocks/test_html_blocks.py | 42 ++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index fea132b00..c08856ab8 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -39,6 +39,22 @@ # so the 'incomplete' functionality is unnecessary. As the entityref regex is run right before incomplete, # and the two regex are the same, then incomplete will simply never match and we avoid the logic within. htmlparser.incomplete = htmlparser.entityref +# Monkeypatch HTMLParser to not accept a backtick in a tag name, attribute name, or bare value. +htmlparser.locatestarttagend_tolerant = re.compile(r""" + <[a-zA-Z][^`\t\n\r\f />\x00]* # tag name <= added backtick here + (?:[\s/]* # optional whitespace before attribute name + (?:(?<=['"\s/])[^`\s/>][^\s/=>]* # attribute name <= added backtick here + (?:\s*=+\s* # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^`>\s]* # bare value <= added backtick here + ) + (?:\s*,)* # possibly followed by a comma + )?(?:\s|/(?!>))* + )* + )? + \s* # trailing whitespace +""", re.VERBOSE) # Match a blank line at the start of a block of text (two newlines). # The newlines may be preceded by additional whitespace. @@ -246,7 +262,6 @@ def parse_html_declaration(self, i): self.handle_data('<foo

' ) + def test_raw_unclosed_tag_in_code_span(self): + self.assertMarkdownRenders( + self.dedent( + """ + ` + hello + + """ + ), + self.dedent( + """ +

<div.

+
+ hello +
+ """ + ) + ) + + def test_raw_unclosed_tag_in_code_span_space(self): + self.assertMarkdownRenders( + self.dedent( + """ + `
+ hello +
+ """ + ), + self.dedent( + """ +

<div.

+
+ hello +
+ """ + ) + ) + def test_raw_attributes(self): self.assertMarkdownRenders( '

text

', From f1063ed33fb92d9fbc0ccef972c8b934a88d8347 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Fri, 20 Nov 2020 15:31:48 -0500 Subject: [PATCH 4/4] release notes; --- docs/change_log/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/change_log/index.md b/docs/change_log/index.md index 7edb2b9a6..0069c2225 100644 --- a/docs/change_log/index.md +++ b/docs/change_log/index.md @@ -5,6 +5,7 @@ Python-Markdown Change Log Under development: version 3.3.4 (a bug-fix release). +* Properly parse unclosed tags in code spans (#1066). * Properly parse processing instructions in md_in_html (#1070). * Properly parse code spans in md_in_html (#1069).