Skip to content

Commit

Permalink
Fix more escape sequences. (#449)
Browse files Browse the repository at this point in the history
  • Loading branch information
felixfontein authored Nov 2, 2024
1 parent c5dfbd1 commit ff15963
Show file tree
Hide file tree
Showing 9 changed files with 44 additions and 44 deletions.
2 changes: 1 addition & 1 deletion v7/import_jekyll/import_jekyll.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def link_repl(matchobj):

def slugify_file(filename):
name, _ = os.path.splitext(os.path.basename(filename))
m = re.match('\d+\-\d+\-\d+\-(?P<name>.*)', name)
m = re.match(r'\d+\-\d+\-\d+\-(?P<name>.*)', name)
if m:
name = m.group('name')

Expand Down
22 changes: 11 additions & 11 deletions v7/markmin/markmin/markmin2html.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

__all__ = ['render', 'markmin2html', 'markmin_escape']

__doc__ = """
__doc__ = r"""
# Markmin markup language
## About
Expand Down Expand Up @@ -548,10 +548,10 @@ def test():
regex_del = re.compile(r'~~(?P<t>[^\s*]+( +[^\s*]+)*)~~')
regex_em = re.compile(r"''(?P<t>([^\s']| |'(?!'))+)''")
regex_num = re.compile(r"^\s*[+-]?((\d+(\.\d*)?)|\.\d+)([eE][+-]?[0-9]+)?\s*$")
regex_list = re.compile('^(?:(?:(#{1,6})|(?:(\.+|\++|\-+)(\.)?))\s*)?(.*)$')
regex_bq_headline = re.compile('^(?:(\.+|\++|\-+)(\.)?\s+)?(-{3}-*)$')
regex_list = re.compile(r'^(?:(?:(#{1,6})|(?:(\.+|\++|\-+)(\.)?))\s*)?(.*)$')
regex_bq_headline = re.compile(r'^(?:(\.+|\++|\-+)(\.)?\s+)?(-{3}-*)$')
regex_tq = re.compile(
'^(-{3}-*)(?::(?P<c>[a-zA-Z][_a-zA-Z\-\d]*)(?:\[(?P<p>[a-zA-Z][_a-zA-Z\-\d]*)\])?)?$')
r'^(-{3}-*)(?::(?P<c>[a-zA-Z][_a-zA-Z\-\d]*)(?:\[(?P<p>[a-zA-Z][_a-zA-Z\-\d]*)\])?)?$')
regex_proto = re.compile(
r'(?<!["\w>/=])(?P<p>\w+):(?P<k>\w+://[\w\d\-+=?%&/:.]+)', re.M)
regex_auto = re.compile(
Expand All @@ -568,11 +568,11 @@ def test():
'\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x05')
ttab_out = maketrans(
'\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x05', "'`:*~\\[]{}@$+-.#\n")
regex_quote = re.compile('(?P<name>\w+?)\s*\=\s*')
regex_quote = re.compile(r'(?P<name>\w+?)\s*\=\s*')


def make_dict(b):
return '{%s}' % regex_quote.sub("'\g<name>':", b)
return '{%s}' % regex_quote.sub(r"'\g<name>':", b)


def safe_eval(node_or_string, env):
Expand Down Expand Up @@ -708,7 +708,7 @@ def render(text,
class_prefix='',
id_prefix='markmin_',
pretty_print=False):
"""
r"""
Arguments:
- text is the text to be processed
- extra is a dict like extra=dict(custom=lambda value: value) that process custom code
Expand Down Expand Up @@ -959,7 +959,7 @@ def render(text,
text = replace_at_urls(text, URL)

if latex == 'google':
text = regex_dd.sub('``\g<latex>``:latex ', text)
text = regex_dd.sub(r'``\g<latex>``:latex ', text)

#############################################################
# replace all blocks marked with ``...``:class[id] with META
Expand Down Expand Up @@ -1321,9 +1321,9 @@ def parse_table_or_blockquote(s, mtag, lineno):
#############################################################
# do strong,em,del
#############################################################
text = regex_strong.sub('<strong>\g<t></strong>', text)
text = regex_del.sub('<del>\g<t></del>', text)
text = regex_em.sub('<em>\g<t></em>', text)
text = regex_strong.sub(r'<strong>\g<t></strong>', text)
text = regex_del.sub(r'<del>\g<t></del>', text)
text = regex_em.sub(r'<em>\g<t></em>', text)

#############################################################
# deal with images, videos, audios and links
Expand Down
2 changes: 1 addition & 1 deletion v7/sphinx_roles/sphinx_roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ def unknown_visit(self, node):
return [pnode], msg_list


_abbr_re = re.compile("\((.*)\)$", re.S)
_abbr_re = re.compile(r"\((.*)\)$", re.S)


class abbreviation(nodes.Inline, nodes.TextElement):
Expand Down
2 changes: 1 addition & 1 deletion v7/static_comments/static_comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class StaticComments(SignalHandler):
"""Add static comments to posts."""

# Used to parse comment headers
_header_regex = re.compile('^\.\. (.*?): (.*)')
_header_regex = re.compile(r'^\.\. (.*?): (.*)')

def _compile_content(self, compiler_name, content, filename):
"""Compile comment content with specified page compiler."""
Expand Down
50 changes: 25 additions & 25 deletions v7/wordpress_compiler/wordpress/default_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,27 +87,27 @@ def __wptexturize_setup(self):

dynamic = []
if "'" != apos:
dynamic.append(('\'(\d\d(?:&#8217;|\')?s)', apos + '\\1')) # '99's
dynamic.append(('\'(\d)', apos + '\\1')) # '99
dynamic.append((r'\'(\d\d(?:&#8217;|\')?s)', apos + '\\1')) # '99's
dynamic.append((r'\'(\d)', apos + '\\1')) # '99
if "'" != opening_single_quote:
dynamic.append(('(\s|\A|[([{<]|")\'', '\\1' + opening_single_quote)) # opening single quote, even after (, {, <, [
dynamic.append((r'(\s|\A|[([{<]|")\'', '\\1' + opening_single_quote)) # opening single quote, even after (, {, <, [
if '"' != double_prime:
dynamic.append(('(\d)"', '\\1' + double_prime)) # 9" (double prime)
dynamic.append((r'(\d)"', '\\1' + double_prime)) # 9" (double prime)
if "'" != prime:
dynamic.append(('(\d)\'', '\\1' + prime)) # 9' (prime)
dynamic.append((r'(\d)\'', '\\1' + prime)) # 9' (prime)
if "'" != apos:
dynamic.append(('(\S)\'([^\'\s])', '\\1' + apos + '\\2')) # apostrophe in a word
dynamic.append((r'(\S)\'([^\'\s])', '\\1' + apos + '\\2')) # apostrophe in a word
if '"' != opening_quote:
dynamic.append(('(\s|\A|[([{<])"(?!\s)', '\\1' + opening_quote)) # opening double quote, even after (, {, <, [
dynamic.append((r'(\s|\A|[([{<])"(?!\s)', '\\1' + opening_quote)) # opening double quote, even after (, {, <, [
# PHP: the original PHP regular expression had a problem, since there was only one capturing group, but both \1 and \2 were
# used on the right-hand side. Since Python throws an exception in that case, while PHP simply treats \2 as an empty string,
# I had to remove the "+'\\2'" after opening_quote.
if '"' != closing_quote:
dynamic.append(('"(\s|\S|\Z)', closing_quote + '\\1')) # closing double quote
dynamic.append((r'"(\s|\S|\Z)', closing_quote + '\\1')) # closing double quote
if "'" != closing_single_quote:
dynamic.append(('\'([\s.]|\Z)', closing_single_quote + '\\1')) # closing single quote
dynamic.append((r'\'([\s.]|\Z)', closing_single_quote + '\\1')) # closing single quote

dynamic.append(('\b(\d+)x(\d+)\b', '\\1&#215;\\2')) # 9x9 (times)
dynamic.append((r'\b(\d+)x(\d+)\b', '\\1&#215;\\2')) # 9x9 (times)

self.dynamic = dynamic

Expand Down Expand Up @@ -144,7 +144,7 @@ def wptexturize(self, text):
no_texturize_shortcodes_stack = []

# PHP: Since Python doesn't support PHP's /U modifier (which inverts quantifier's greediness), I modified the regular expression accordingly
textarr = regex.split('(<.*?>|\[.*?\])', text, flags=regex.DOTALL)
textarr = regex.split(r'(<.*?>|\[.*?\])', text, flags=regex.DOTALL)

result = []
for curl in textarr:
Expand Down Expand Up @@ -240,8 +240,8 @@ def __convert_chars_setup(self):

def convert_chars(self, content):
# Remove metadata tags
content = regex.sub('<title>(.+?)<\/title>', '', content)
content = regex.sub('<category>(.+?)<\/category>', '', content)
content = regex.sub(r'<title>(.+?)<\/title>', '', content)
content = regex.sub(r'<category>(.+?)<\/category>', '', content)

# Converts lone & characters into &#38; (a.k.a. &amp;)
content = regex.sub('&([^#])(?![a-z1-4]{1,8};)', '&#038;\\1', content, regex.IGNORECASE)
Expand Down Expand Up @@ -288,36 +288,36 @@ def wpautop(self, pee, br=True):

pee += last_pee

pee = regex.sub('<br />\s*<br />', "\n\n", pee)
pee = regex.sub(r'<br />\s*<br />', "\n\n", pee)
# Space things out a little
self.allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|noscript|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'
pee = regex.sub('(<' + self.allblocks + '[^>]*>)', "\n\\1", pee)
pee = regex.sub('(</' + self.allblocks + '>)', "\\1\n\n", pee)
pee = pee.replace("\r\n", "\n").replace("\r", "\n") # cross-platform newlines
if pee.find('<object') >= 0:
pee = regex.sub('\s*<param([^>]*)>\s*', "<param\\1>", pee) # no pee inside object/embed
pee = regex.sub('\s*</embed>\s*', '</embed>', pee)
pee = regex.sub(r'\s*<param([^>]*)>\s*', "<param\\1>", pee) # no pee inside object/embed
pee = regex.sub(r'\s*</embed>\s*', '</embed>', pee)
pee = regex.sub("\n\n+", "\n\n", pee) # take care of duplicates
# make paragraphs, including one at the end
pees = regex.split('\n\s*\n', pee)
pees = regex.split('\n\\s*\n', pee)
pee = ''
for trinkle in pees:
if len(trinkle) > 0: # PHP: this emulates PHP's flag PREG_SPLIT_NO_EMPTY for preg_split()
pee += '<p>' + trinkle.strip("\n") + "</p>\n"
pee = regex.sub('<p>\s*</p>', '', pee) # under certain strange conditions it could create a P of entirely whitespace
pee = regex.sub(r'<p>\s*</p>', '', pee) # under certain strange conditions it could create a P of entirely whitespace
pee = regex.sub('<p>([^<]+)</(div|address|form)>', "<p>\\1</p></\\2>", pee)
pee = regex.sub('<p>\s*(</?' + self.allblocks + '[^>]*>)\s*</p>', "\\1", pee) # don't pee all over a tag
pee = regex.sub(r'<p>\s*(</?' + self.allblocks + r'[^>]*>)\s*</p>', "\\1", pee) # don't pee all over a tag
pee = regex.sub("<p>(<li.+?)</p>", "\\1", pee) # problem with nested lists
pee = regex.sub('<p><blockquote([^>]*)>', "<blockquote\\1><p>", pee, regex.IGNORECASE)
pee = pee.replace('</blockquote></p>', '</p></blockquote>')
pee = regex.sub('<p>\s*(</?' + self.allblocks + '[^>]*>)', "\\1", pee)
pee = regex.sub('(</?' + self.allblocks + '[^>]*>)\s*</p>', "\\1", pee)
pee = regex.sub(r'<p>\s*(</?' + self.allblocks + '[^>]*>)', "\\1", pee)
pee = regex.sub('(</?' + self.allblocks + r'[^>]*>)\s*</p>', "\\1", pee)
if br:
pee = php.preg_replace_callback('<(script|style).*?<\/\\1>', lambda x: self.__autop_newline_preservation_helper(x), pee, regex.DOTALL)
pee = regex.sub('(?<!<br />)\s*\n', "<br />\n", pee) # optionally make line breaks
pee = php.preg_replace_callback('<(script|style).*?<\\/\\1>', lambda x: self.__autop_newline_preservation_helper(x), pee, regex.DOTALL)
pee = regex.sub('(?<!<br />)\\s*\n', "<br />\n", pee) # optionally make line breaks
pee = pee.replace('<WPPreserveNewline />', "\n")
pee = regex.sub('(</?' + self.allblocks + '[^>]*>)\s*<br />', "\\1", pee)
pee = regex.sub('<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)', '\\1', pee)
pee = regex.sub('(</?' + self.allblocks + r'[^>]*>)\s*<br />', "\\1", pee)
pee = regex.sub(r'<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)', '\\1', pee)
pee = regex.sub("\n</p>$", '</p>', pee)

if len(pre_tags) > 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def __init__(self):

def _filter_code_tags(self, text, context):
result = ''
for piece in regex.split('(\[code(?:|\s+language="[^"]*?")\].*?\[/code\])', text, flags=regex.DOTALL | regex.IGNORECASE):
match = regex.match('\[code(?:|\s+language="([^"]*?)")\](.*?)\[/code\]', piece, flags=regex.DOTALL | regex.IGNORECASE)
for piece in regex.split(r'(\[code(?:|\s+language="[^"]*?")\].*?\[/code\])', text, flags=regex.DOTALL | regex.IGNORECASE):
match = regex.match(r'\[code(?:|\s+language="([^"]*?)")\](.*?)\[/code\]', piece, flags=regex.DOTALL | regex.IGNORECASE)
if match is not None:
the_id = str(context.inc_plugin_counter('wordpress_shortcode_code', 'counter'))
context.store_plugin_data('wordpress_shortcode_code', the_id, (match.group(2), match.group(1)))
Expand Down
2 changes: 1 addition & 1 deletion v7/wordpress_compiler/wordpress/shortcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def unregister_shortcode(self, tag):
del self._shorcode_tags[tag]

def _extract_arguments(self, argsString):
pattern = '(\w+)\s*=\s*"([^"]*)"(?:\s|$)|(\w+)\s*=\s*\'([^\']*)\'(?:\s|$)|(\w+)\s*=\s*([^\s\'"]+)(?:\s|$)|"([^"]*)"(?:\s|$)|(\S+)(?:\s|$)'
pattern = r'(\w+)\s*=\s*"([^"]*)"(?:\s|$)|(\w+)\s*=\s*\'([^\']*)\'(?:\s|$)|(\w+)\s*=\s*([^\s\'"]+)(?:\s|$)|"([^"]*)"(?:\s|$)|(\S+)(?:\s|$)'
argsString = regex.sub("[\u00A0\u200B]+", " ", argsString)
matches = regex.findall(pattern, argsString)
if len(matches) > 0:
Expand Down
2 changes: 1 addition & 1 deletion v8/markmin/markmin/markmin2html.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ def test():
"\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x05",
"'`:*~\\[]{}@$+-.#\n",
)
regex_quote = re.compile("(?P<name>\w+?)\s*\=\s*")
regex_quote = re.compile(r"(?P<name>\w+?)\s*\=\s*")


def make_dict(b):
Expand Down
2 changes: 1 addition & 1 deletion v8/webmentions/webmentions.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def check_link_header_for_webmention(self, header):
"""Process a header and look for webmention related entries"""

regexes = [
"<(.[^>]+)>;\s+rel\s?=\s?[\"']?(http:\/\/)?webmention(\.org)?\/?[\"']?"
r"<(.[^>]+)>;\s+rel\s?=\s?[\"']?(http:\/\/)?webmention(\.org)?\/?[\"']?"
]

if "webmention" not in header:
Expand Down

0 comments on commit ff15963

Please sign in to comment.