Skip to content

Commit

Permalink
Enclose white spaces in references (#1105)
Browse files Browse the repository at this point in the history
Since version 0.22 gettext encloses file names in references which
contain white spaces or tabs within First Strong Isolate (U+2068) and
Pop Directional Isolate (U+2069). This commit adds the same behavior for
Babel.
  • Loading branch information
Dunedan authored Sep 4, 2024
1 parent b42344d commit f91754b
Show file tree
Hide file tree
Showing 2 changed files with 179 additions and 2 deletions.
69 changes: 67 additions & 2 deletions babel/messages/pofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,50 @@ def denormalize(string: str) -> str:
return unescape(string)


def _extract_locations(line: str) -> list[str]:
"""Extract locations from location comments.
Locations are extracted while properly handling First Strong
Isolate (U+2068) and Pop Directional Isolate (U+2069), used by
gettext to enclose filenames with spaces and tabs in their names.
"""
if "\u2068" not in line and "\u2069" not in line:
return line.lstrip().split()

locations = []
location = ""
in_filename = False
for c in line:
if c == "\u2068":
if in_filename:
raise ValueError("location comment contains more First Strong Isolate "
"characters, than Pop Directional Isolate characters")
in_filename = True
continue
elif c == "\u2069":
if not in_filename:
raise ValueError("location comment contains more Pop Directional Isolate "
"characters, than First Strong Isolate characters")
in_filename = False
continue
elif c == " ":
if in_filename:
location += c
elif location:
locations.append(location)
location = ""
else:
location += c
else:
if location:
if in_filename:
raise ValueError("location comment contains more First Strong Isolate "
"characters, than Pop Directional Isolate characters")
locations.append(location)

return locations


class PoFileError(Exception):
"""Exception thrown by PoParser when an invalid po file is encountered."""

Expand Down Expand Up @@ -269,7 +313,7 @@ def _process_comment(self, line) -> None:
self._finish_current_message()

if line[1:].startswith(':'):
for location in line[2:].lstrip().split():
for location in _extract_locations(line[2:]):
pos = location.rfind(':')
if pos >= 0:
try:
Expand Down Expand Up @@ -307,7 +351,10 @@ def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:
if line[1:].startswith('~'):
self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
else:
self._process_comment(line)
try:
self._process_comment(line)
except ValueError as exc:
self._invalid_pofile(line, lineno, str(exc))
else:
self._process_message_line(lineno, line)

Expand Down Expand Up @@ -474,6 +521,23 @@ def normalize(string: str, prefix: str = '', width: int = 76) -> str:
return '""\n' + '\n'.join([(prefix + escape(line)) for line in lines])


def _enclose_filename_if_necessary(filename: str) -> str:
"""Enclose filenames which include white spaces or tabs.
Do the same as gettext and enclose filenames which contain white
spaces or tabs with First Strong Isolate (U+2068) and Pop
Directional Isolate (U+2069).
"""
if " " not in filename and "\t" not in filename:
return filename

if not filename.startswith("\u2068"):
filename = "\u2068" + filename
if not filename.endswith("\u2069"):
filename += "\u2069"
return filename


def write_po(
fileobj: SupportsWrite[bytes],
catalog: Catalog,
Expand Down Expand Up @@ -626,6 +690,7 @@ def _format_message(message, prefix=''):

for filename, lineno in locations:
location = filename.replace(os.sep, '/')
location = _enclose_filename_if_necessary(location)
if lineno and include_lineno:
location = f"{location}:{lineno:d}"
if location not in locs:
Expand Down
112 changes: 112 additions & 0 deletions tests/messages/test_pofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from babel.core import Locale
from babel.messages import pofile
from babel.messages.catalog import Catalog, Message
from babel.messages.pofile import _enclose_filename_if_necessary, _extract_locations
from babel.util import FixedOffsetTimezone


Expand Down Expand Up @@ -438,6 +439,19 @@ def test_missing_plural_in_the_middle(self):
assert message.string[1] == ''
assert message.string[2] == 'Vohs [text]'

def test_with_location(self):
buf = StringIO('''\
#: main.py:1 \u2068filename with whitespace.py\u2069:123
msgid "foo"
msgstr "bar"
''')
catalog = pofile.read_po(buf, locale='de_DE')
assert len(catalog) == 1
message = catalog['foo']
assert message.string == 'bar'
assert message.locations == [("main.py", 1), ("filename with whitespace.py", 123)]


def test_abort_invalid_po_file(self):
invalid_po = '''
msgctxt ""
Expand Down Expand Up @@ -841,6 +855,59 @@ def test_no_include_lineno(self):
msgid "foo"
msgstr ""'''

def test_white_space_in_location(self):
catalog = Catalog()
catalog.add('foo', locations=[('main.py', 1)])
catalog.add('foo', locations=[('utils b.py', 3)])
buf = BytesIO()
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
msgid "foo"
msgstr ""'''

def test_white_space_in_location_already_enclosed(self):
catalog = Catalog()
catalog.add('foo', locations=[('main.py', 1)])
catalog.add('foo', locations=[('\u2068utils b.py\u2069', 3)])
buf = BytesIO()
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
msgid "foo"
msgstr ""'''

def test_tab_in_location(self):
catalog = Catalog()
catalog.add('foo', locations=[('main.py', 1)])
catalog.add('foo', locations=[('utils\tb.py', 3)])
buf = BytesIO()
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
msgid "foo"
msgstr ""'''

def test_tab_in_location_already_enclosed(self):
catalog = Catalog()
catalog.add('foo', locations=[('main.py', 1)])
catalog.add('foo', locations=[('\u2068utils\tb.py\u2069', 3)])
buf = BytesIO()
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
msgid "foo"
msgstr ""'''


class RoundtripPoTestCase(unittest.TestCase):

def test_enclosed_filenames_in_location_comment(self):
catalog = Catalog()
catalog.add("foo", lineno=2, locations=[("main 1.py", 1)], string="")
catalog.add("bar", lineno=6, locations=[("other.py", 2)], string="")
catalog.add("baz", lineno=10, locations=[("main 1.py", 3), ("other.py", 4)], string="")
buf = BytesIO()
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
buf.seek(0)
catalog2 = pofile.read_po(buf)
assert True is catalog.is_identical(catalog2)

class PofileFunctionsTestCase(unittest.TestCase):

Expand All @@ -864,6 +931,51 @@ def test_denormalize_on_msgstr_without_empty_first_line(self):
assert expected_denormalized == pofile.denormalize(f'""\n{msgstr}')


@pytest.mark.parametrize(("line", "locations"), [
("\u2068file1.po\u2069", ["file1.po"]),
("file1.po \u2068file 2.po\u2069 file3.po", ["file1.po", "file 2.po", "file3.po"]),
("file1.po:1 \u2068file 2.po\u2069:2 file3.po:3", ["file1.po:1", "file 2.po:2", "file3.po:3"]),
("\u2068file1.po\u2069:1 \u2068file\t2.po\u2069:2 file3.po:3",
["file1.po:1", "file\t2.po:2", "file3.po:3"]),
("file1.po file2.po", ["file1.po", "file2.po"]),
("file1.po \u2068\u2069 file2.po", ["file1.po", "file2.po"]),
])
def test_extract_locations_valid_location_comment(line, locations):
assert locations == _extract_locations(line)


@pytest.mark.parametrize(("line",), [
("\u2068file 1.po",),
("file 1.po\u2069",),
("\u2069file 1.po\u2068",),
("\u2068file 1.po:1 \u2068file 2.po\u2069:2",),
("\u2068file 1.po\u2069:1 file 2.po\u2069:2",),
])
def test_extract_locations_invalid_location_comment(line):
with pytest.raises(ValueError):
_extract_locations(line)


@pytest.mark.parametrize(("filename",), [
("file.po",),
("file_a.po",),
("file-a.po",),
("file\n.po",),
("\u2068file.po\u2069",),
("\u2068file a.po\u2069",),
])
def test_enclose_filename_if_necessary_no_change(filename):
assert filename == _enclose_filename_if_necessary(filename)


@pytest.mark.parametrize(("filename",), [
("file a.po",),
("file\ta.po",),
])
def test_enclose_filename_if_necessary_enclosed(filename):
assert "\u2068" + filename + "\u2069" == _enclose_filename_if_necessary(filename)


def test_unknown_language_roundtrip():
buf = StringIO(r'''
msgid ""
Expand Down

0 comments on commit f91754b

Please sign in to comment.