Skip to content

Commit

Permalink
[3.13] gh-126807: pygettext: Do not attempt to extract messages from …
Browse files Browse the repository at this point in the history
…function definitions. (GH-126808) (GH-126846)

Fixes a bug where pygettext would attempt
to extract a message from a code like this:

def _(x): pass

This is because pygettext only looks at one
token at a time and '_(x)' looks like a
function call.

However, since 'x' is not a string literal,
it would erroneously issue a warning.
(cherry picked from commit 9a45638)

Co-authored-by: Tomas R <tomas.roun8@gmail.com>
  • Loading branch information
miss-islington and tomasr8 authored Nov 14, 2024
1 parent bf6fa21 commit bf40fdb
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 5 deletions.
33 changes: 28 additions & 5 deletions Lib/test/test_tools/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,23 @@ def assert_POT_equal(self, expected, actual):
self.maxDiff = None
self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))

def extract_docstrings_from_str(self, module_content):
""" utility: return all msgids extracted from module_content """
filename = 'test_docstrings.py'
with temp_cwd(None) as cwd:
def extract_from_str(self, module_content, *, args=(), strict=True):
"""Return all msgids extracted from module_content."""
filename = 'test.py'
with temp_cwd(None):
with open(filename, 'w', encoding='utf-8') as fp:
fp.write(module_content)
assert_python_ok('-Xutf8', self.script, '-D', filename)
res = assert_python_ok('-Xutf8', self.script, *args, filename)
if strict:
self.assertEqual(res.err, b'')
with open('messages.pot', encoding='utf-8') as fp:
data = fp.read()
return self.get_msgids(data)

def extract_docstrings_from_str(self, module_content):
"""Return all docstrings extracted from module_content."""
return self.extract_from_str(module_content, args=('--docstrings',), strict=False)

def test_header(self):
"""Make sure the required fields are in the header, according to:
http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
Expand Down Expand Up @@ -344,6 +350,23 @@ def test_calls_in_fstring_with_partially_wrong_expression(self):
self.assertNotIn('foo', msgids)
self.assertIn('bar', msgids)

def test_function_and_class_names(self):
"""Test that function and class names are not mistakenly extracted."""
msgids = self.extract_from_str(dedent('''\
def _(x):
pass
def _(x="foo"):
pass
async def _(x):
pass
class _(object):
pass
'''))
self.assertEqual(msgids, [''])

def test_pygettext_output(self):
"""Test that the pygettext output exactly matches snapshots."""
for input_file in DATA_DIR.glob('*.py'):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix extraction warnings in :program:`pygettext.py` caused by mistaking
function definitions for function calls.
6 changes: 6 additions & 0 deletions Tools/i18n/pygettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,9 @@ def __waiting(self, ttype, tstring, lineno):
if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__suiteseen
return
if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__ignorenext
return
if ttype == tokenize.NAME and tstring in opts.keywords:
self.__state = self.__keywordseen
return
Expand Down Expand Up @@ -448,6 +451,9 @@ def __openseen(self, ttype, tstring, lineno):
}, file=sys.stderr)
self.__state = self.__waiting

def __ignorenext(self, ttype, tstring, lineno):
self.__state = self.__waiting

def __addentry(self, msg, lineno=None, isdocstring=0):
if lineno is None:
lineno = self.__lineno
Expand Down

0 comments on commit bf40fdb

Please sign in to comment.