Skip to content

Commit

Permalink
py3: use raw strings for regexes
Browse files Browse the repository at this point in the history
Fixes linting issues and simplifies some patterns by removing unneeded
escaping.
  • Loading branch information
julen committed Jan 22, 2020
1 parent d47a758 commit 6637ada
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 76 deletions.
2 changes: 1 addition & 1 deletion pootle/apps/accounts/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class User(AbstractBaseUser):
help_text=_('Required. 30 characters or fewer. Letters, numbers and '
'@/./+/-/_ characters'),
validators=[
RegexValidator(re.compile('^[\w.@+-]+$'),
RegexValidator(re.compile(r'^[\w.@+-]+$'),
_('Enter a valid username.'),
'invalid')
],
Expand Down
96 changes: 48 additions & 48 deletions pootle/apps/pootle_misc/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,58 +125,58 @@

# pre-compile all regexps

fmt = u"\{\d+(?:,(?:number|date|time|choice))\}"
fmt_esc = u"\\\{\d+\\\}"
fmt = r"{\d+(?:,(?:number|date|time|choice))}"
fmt_esc = r"\\{\d+\\}"
java_format_regex = re.compile(u"(%s|%s)" % (fmt, fmt_esc))

fmt = u"\$\{[a-zA-Z_\d\.\:]+\}"
fmt = r"\${[a-zA-Z_\d\.\:]+}"
template_format_regex = re.compile(u"(%s)" % fmt)

fmt = u"%\d+\$[a-z]+"
fmt = r"%\d+\$[a-z]+"
android_format_regex = re.compile(u"(%s)" % fmt)

fmt = u"%@|%\d+\$@"
fmt = r"%@|%\d+\$@"
objective_c_format_regex = re.compile(u"(%s)" % fmt)

fmt = u"\\\\u[a-fA-F0-9]{4}"
fmt = r"\\\\u[a-fA-F0-9]{4}"
javaencoded_unicode_regex = re.compile(u"(%s)" % fmt)

fmt = u"\$[a-zA-Z_\d]+?(?![\$\%])"
fmt = r"\$[a-zA-Z_\d]+?(?![$%])"
dollar_sign_placeholders_regex = re.compile(u"(%s)" % fmt)

fmt = u"\$[a-zA-Z_\d]+?\$"
fmt = r"\$[a-zA-Z_\d]+?\$"
dollar_sign_closure_placeholders_regex = re.compile(u"(%s)" % fmt)

fmt = u"\%\%[a-zA-Z_\d]+?\%\%"
fmt = r"%%[a-zA-Z_\d]+?%%"
percent_sign_closure_placeholders_regex = re.compile(u"(%s)" % fmt)

fmt = u"\%[a-zA-Z_]+?(?![\$\%])"
fmt = r"%[a-zA-Z_]+?(?![$%])"
percent_sign_placeholders_regex = re.compile(u"(%s)" % fmt)

fmt = u"[A-Z_][A-Z0-9]*_[A-Z0-9_]*(?![a-z])"
fmt = r"[A-Z_][A-Z0-9]*_[A-Z0-9_]*(?![a-z])"
uppercase_placeholders_regex = re.compile(u"(%s)" % fmt)

fmt4 = u"\{{1}\d+,[^\}]+\}{1}"
fmt3 = u"\{{3}\S+?\}{3}"
fmt2 = u"\{{2}\S+?\}{2}"
fmt1 = u"\{{1}\S+?\}{1}"
fmt4 = r"{{1}\d+,[^}]+}{1}"
fmt3 = r"{{3}\S+?}{3}"
fmt2 = r"{{2}\S+?}{2}"
fmt1 = r"{{1}\S+?}{1}"

mustache_placeholders_regex = re.compile(
u"(%s|%s|%s|%s)" % (fmt4, fmt3, fmt2, fmt1))

mustache_placeholder_pairs_open_tag_regex = re.compile(
u"\{{2}[#\^][^\}]+\}{2}")
fmt = u"\{{2}[#\^\/][^\}]+\}{2}"
r"{{2}[#^][^}]+}{2}")
fmt = r"{{2}[#^/][^}]+}{2}"
mustache_placeholder_pairs_regex = re.compile(u"(%s)" % fmt)

fmt = u"\{{2}[\/]?[^\}]+\}{2}"
fmt = r"{{2}/?[^}]+}{2}"
mustache_like_placeholder_pairs_regex = re.compile(u"(%s)" % fmt)

# date_format
df_blocks = u"|".join(
map(lambda x: '%s+' % x, 'GyYMwWDdFEuaHkKhmsSzZX')) + u"|\'[\w]+\'"
df_glued_blocks = u"X+|Z+|\'[\w]*\'"
df_delimiter = u"[^\w']+|\'[\w]*\'"
df_blocks = "|".join(
map(lambda x: '%s+' % x, 'GyYMwWDdFEuaHkKhmsSzZX')) + r"|'[\w]+'"
df_glued_blocks = r"X+|Z+|'[\w]*'"
df_delimiter = r"[^\w']+|'[\w]*'"
date_format_regex = re.compile(
u"^(%(blocks)s)(%(glued_blocks)s)?((%(delimiter)s)+(%(blocks)s))*$" % {
'blocks': df_blocks,
Expand All @@ -185,63 +185,63 @@
})
date_format_exception_regex = re.compile(u"^(M|S|W|F)$", re.I)

fmt = u"^\s+|\s+$"
fmt = r"^\s+|\s+$"
whitespace_regex = re.compile(u"(%s)" % fmt)

fmt = u"&#\d+;|&[a-zA-Z]+;|&#x[0-9a-fA-F]+;"
fmt = r"&#\d+;|&[a-zA-Z]+;|&#x[0-9a-fA-F]+;"
escaped_entities_regex = re.compile(u"(%s)" % fmt)
broken_ampersand_regex = re.compile(u"(&[^#a-zA-Z]+)")

img_banner_regex = re.compile(u'^\<img src="\/images\/account\/bnr_')
img_banner_regex = re.compile(r'^<img src="/images/account/bnr_')

fmt1 = u"\b(?!alt|placeholder|title)[a-zA-Z_\d]+\s*=\s*'(?:.*?)'"
fmt2 = u'\b(?!alt|placeholder|title)[a-zA-Z_\d]+\s*=\s*"(?:.*?)"'
fmt1 = r"\b(?!alt|placeholder|title)[a-zA-Z_\d]+\s*=\s*'(?:.*?)'"
fmt2 = r'\b(?!alt|placeholder|title)[a-zA-Z_\d]+\s*=\s*"(?:.*?)"'
changed_attributes_regex = re.compile(u"(%s|%s)" % (fmt2, fmt1))

fmt = u"%[\d]*(?:.\d+)*(?:h|l|I|I32|I64)*[cdiouxefgns]"
fmt = r"%[\d]*(?:.\d+)*(?:h|l|I|I32|I64)*[cdiouxefgns]"
c_format_regex = re.compile(u"(%s)" % fmt)

fmt = u"[\000-\011\013-\037]"
fmt = r"[\000-\011\013-\037]"
non_printable_regex = re.compile(u"(%s)" % fmt)

fmt = u"[\<\>]"
fmt = r"[<>]"
unbalanced_tag_braces_regex = re.compile(u"(%s)" % fmt)

fmt = u"[\{\}]"
fmt = r"[{}]"
unbalanced_curly_braces_regex = re.compile(u"(%s)" % fmt)

fmt = u'^<(Sync Required|None|no attributes|no tags|' + \
u'no saved|searches|notebook|not available)>$'
no_tags_regex = re.compile(fmt)

fmt = u"<\/?[a-zA-Z_]+.*?>"
cdata_fmt = u'<!\[CDATA\[(?:[^]]|\](?!\]>))*\]\]>'
fmt = r"</?[a-zA-Z_]+.*?>"
cdata_fmt = r'<!\[CDATA\[(?:[^]]|\](?!\]>))*\]\]>'
tags_differ_regex_0 = re.compile(u"(%s|%s)" % (fmt, cdata_fmt))
tags_differ_regex_1 = re.compile(u"<(\/?[a-zA-Z_]+).*?>")
tags_differ_regex_1 = re.compile(r"<(/?[a-zA-Z_]+).*?>")

accelerators_regex_0 = re.compile(u"&(\w+);")
fmt = u"[&_\^]"
accelerators_regex_1 = re.compile(u"(%s)(?=\w)" % fmt)
accelerators_regex_0 = re.compile(r"&(\w+);")
fmt = r"[&_^]"
accelerators_regex_1 = re.compile(r"(%s)(?=\w)" % fmt)

fmt = u"&#?[0-9a-zA-Z]+;?"
fmt = r"&#?[0-9a-zA-Z]+;?"
broken_entities_regex_0 = re.compile(u"(%s)" % fmt)
entities = [
'amp', 'deg', 'frac14', 'frac12', 'frac34', 'lt', 'gt', 'nbsp', 'mdash',
'ndash', 'hellip', 'laquo', 'raquo', 'ldquo', 'rdquo', 'lsquo', 'rsquo',
'larr', 'rarr'
]
broken_entities_regex_1 = re.compile(u"^&(%s)$" % '|'.join(entities))
broken_entities_regex_2 = re.compile(u"^&#x?[0-9a-fA-F]+$")
broken_entities_regex_3 = re.compile(u"&\d+;")
broken_entities_regex_4 = re.compile(u"&x[0-9a-fA-F]+;")
broken_entities_regex_5 = re.compile(u"&#([^x\d])([0-9a-fA-F]+);")
broken_entities_regex_6 = re.compile(u"&#(\d+);")
broken_entities_regex_7 = re.compile(u"&#x([a-zA-Z_]+);")
broken_entities_regex_1 = re.compile(r"^&(%s)$" % '|'.join(entities))
broken_entities_regex_2 = re.compile(r"^&#x?[0-9a-fA-F]+$")
broken_entities_regex_3 = re.compile(r"&\d+;")
broken_entities_regex_4 = re.compile(r"&x[0-9a-fA-F]+;")
broken_entities_regex_5 = re.compile(r"&#([^x\d])([0-9a-fA-F]+);")
broken_entities_regex_6 = re.compile(r"&#(\d+);")
broken_entities_regex_7 = re.compile(r"&#x([a-zA-Z_]+);")

fmt = u"[$%_@]"
potential_placeholders_regex = re.compile(u"(%s)" % fmt)

fmt = u"\%\{{1}[^\}]+\}{1}"
fmt = r"%{{1}[^}]+}{1}"
percent_brace_placeholders_regex = re.compile(u"(%s)" % fmt)

plurr_format_regex = re.compile(u'{[^{}]*:.*?}')
Expand Down Expand Up @@ -758,7 +758,7 @@ def get_fingerprint(string, is_source=False, translation=''):
underscore_count = 0
circumflex_count = 0

regex = re.compile(u"\001(\w+)\001")
regex = re.compile(r"\001(\w+)\001")
for chunk in chunks:
translate = not translate
if translate:
Expand Down Expand Up @@ -830,7 +830,7 @@ def get_fingerprint(string, is_source=False, translation=''):
# something else) for a hexadecimal entity
mo = broken_entities_regex_5.match(chunk)
if mo:
regex = re.compile(u"\D")
regex = re.compile(r"\D")
if regex.match(mo.group(1)) or regex.match(mo.group(2)):
fingerprint += 1

Expand Down
2 changes: 1 addition & 1 deletion pootle/apps/pootle_misc/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from translate.search import match, terminology


delimiters = re.compile(u"[\W]+")
delimiters = re.compile(r"[\W]+")


class Matcher(match.terminologymatcher):
Expand Down
2 changes: 1 addition & 1 deletion pootle/apps/pootle_store/templatetags/store_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
register = template.Library()


IMAGE_URL_RE = re.compile("(https?://[^\s]+\.(png|jpe?g|gif))", re.IGNORECASE)
IMAGE_URL_RE = re.compile(r"(https?://[^\s]+.(png|jpe?g|gif))", re.IGNORECASE)


@register.filter
Expand Down
2 changes: 1 addition & 1 deletion pootle/apps/reports/reporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def filepath(self):
def clean_config_value(self, value):
if not isinstance(value, str):
return value
return re.sub('\n\s+', '\n', value)
return re.sub(r'\n\s+', r'\n', value)

def get_invoice_data(self, invoice):
"""Gets individual invoice data."""
Expand Down
2 changes: 1 addition & 1 deletion pootle/core/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class MathCaptchaForm(forms.Form):
http://www.mysoftparade.com/blog/improved-mathematical-captcha/
"""

A_RE = re.compile("^(\d+)$")
A_RE = re.compile(r"^(\d+)$")

captcha_answer = forms.CharField(
max_length=2, required=True,
Expand Down
46 changes: 23 additions & 23 deletions pootle/core/utils/wordcount.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,36 +12,36 @@
re._MAXCACHE = 1000


remove = re.compile(u"[\.]+") # dots
delimiters = re.compile(u"[\W]+") # anything except a-z, A-Z and _
delimiters_begin = re.compile(u"^[\W]+") # anything except a-z, A-Z and _
delimiters_end = re.compile(u"[\W]+$") # anything except a-z, A-Z and _
remove = re.compile(r"[.]+") # dots
delimiters = re.compile(r"[\W]+") # anything except a-z, A-Z and _
delimiters_begin = re.compile(r"^[\W]+") # anything except a-z, A-Z and _
delimiters_end = re.compile(r"[\W]+$") # anything except a-z, A-Z and _

english_date = re.compile(
u"(^|\W)(January|February|March|April|May|June|July|August|September|"
u"October|November|December)\s+\d{1,2},\s+(?:\d{2})?\d{2}(\W|$)"
r"(^|\W)(January|February|March|April|May|June|July|August|September|"
r"October|November|December)\s+\d{1,2},\s+(?:\d{2})?\d{2}(\W|$)"
)

escaped_xmltag_regex = re.compile(u'(&lt;\/?[\w]+.*?>)')
xmltag_regex = re.compile(u'(<\/?[\w]+.*?>)')
java_format_regex = re.compile(u'(\\\{\d+\\\}|\{\d+\})')
template_format_regex = re.compile(u'(\$\{[\w\.\:]+\})')
android_format_regex = re.compile(u'(%\d\$\w)')
sprintf_regex = re.compile(u'(%[\d]*(?:.\d+)*(?:h|l|I|I32|I64)*[cdiouxefgns])')
objective_c_regex = re.compile(u'(%@)')
dollar_sign_regex = re.compile(u'(\$[\w\d]+?\$)')
persent_sign_regex = re.compile(u'(\%[\w\d]+?\%)')
newline_regex = re.compile(u'(\{\\\n\})')
escaping_sqc_regex = re.compile(u'(\\\+[rnt])')
xml_entities_regex = re.compile(u'(&#\d+;|&\w+;)')
escaped_xmltag_regex = re.compile(r'(&lt;/?[\w]+.*?>)')
xmltag_regex = re.compile(r'(</?[\w]+.*?>)')
java_format_regex = re.compile(r'(\\{\d+\\}|{\d+})')
template_format_regex = re.compile(r'(\${[\w.:]+})')
android_format_regex = re.compile(r'(%\d\$\w)')
sprintf_regex = re.compile(r'(%[\d]*(?:.\d+)*(?:h|l|I|I32|I64)*[cdiouxefgns])')
objective_c_regex = re.compile(r'(%@)')
dollar_sign_regex = re.compile(r'(\$[\w\d]+?\$)')
persent_sign_regex = re.compile(r'(%[\w\d]+?%)')
newline_regex = re.compile(r'({\\\n})')
# escaping_sqc_regex = re.compile(u'(\\\+[rnt])')
escaping_sqc_regex = re.compile(r'(\\\+[rnt])')
xml_entities_regex = re.compile(r'(&#\d+;|&\w+;)')
product_names_regex = re.compile(
u"(Evernote International|Evernote Food|Evernote Hello|Evernote Clearly|"
u"Evernote Business|Skitch|Evernote®?|Food|^Hello$|Clearly)"
)
shortcuts_regex = re.compile(u'(Ctrl\+\w$|Shift\+\w$|Alt\+\w$)')
shortcuts_modifier_regex = re.compile(u'(Ctrl\+$|Shift\+$|Alt\+$)')
hanging_symbols_regex = \
re.compile(u'(^[^\w\&]\s|\s[^\w\&]\s|\s[^\w\&]$|^[^\w\&]$)')
shortcuts_regex = re.compile(r'(Ctrl\+\w$|Shift\+\w$|Alt\+\w$)')
shortcuts_modifier_regex = re.compile(r'(Ctrl\+$|Shift\+$|Alt\+$)')
hanging_symbols_regex = re.compile(r'(^[^\w&]\s|\s[^\w&]\s|\s[^\w&]$|^[^\w&]$)')


def find_placeholders(aref, regex, cls=''):
Expand Down Expand Up @@ -126,7 +126,7 @@ def _count_words(aref):
s = chunk['string']
# Replace the date with just the month name (i.e. count as a single
# word)
s = english_date.sub(u'\g<1>\g<2>\g<3>', s)
s = english_date.sub(r'\g<1>\g<2>\g<3>', s)

s = remove.sub(u'', s)
s = delimiters_begin.sub(u'', s)
Expand Down

0 comments on commit 6637ada

Please sign in to comment.