From 12c171f193b7b38329a75bbaecabca814831d5e5 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 6 May 2024 13:20:47 +0200 Subject: [PATCH] Use the ``data_filter`` when extracting tarballs, if it's available. (#12214) Previous behaviour is used on Python without PEP-720 tarfile filters. (Note that the feature is now in security releases of all supported versions.) A custom filter (which wraps `data_filter`) is used to retain pip-specific behaviour: - Removing a common leading directory - Setting the mode (Unix permissions) Compared to the previous behaviour, if a file can't be unpacked, the unpacking operation will fail with `InstallError`, rather than skipping the individual file with a `logger.warning`. This means that "some corrupt tar files" now can't be unpacked. Note that PEP 721 limits itself to sdists, this change affects unpacking any other tar file. --- news/12111.feature.rst | 1 + src/pip/_internal/utils/unpacking.py | 170 +++++++++++++++++++-------- tests/unit/test_utils_unpacking.py | 28 ++++- 3 files changed, 148 insertions(+), 51 deletions(-) create mode 100644 news/12111.feature.rst diff --git a/news/12111.feature.rst b/news/12111.feature.rst new file mode 100644 index 00000000000..eb03b76d826 --- /dev/null +++ b/news/12111.feature.rst @@ -0,0 +1 @@ +PEP 721: Use the ``data_filter`` when extracting tarballs, if it's available. diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 78b5c13ced3..341269550ce 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -5,6 +5,7 @@ import os import shutil import stat +import sys import tarfile import zipfile from typing import Iterable, List, Optional @@ -85,12 +86,16 @@ def is_within_directory(directory: str, target: str) -> bool: return prefix == abs_directory +def _get_default_mode_plus_executable() -> int: + return 0o777 & ~current_umask() | 0o111 + + def set_extracted_file_to_default_mode_plus_executable(path: str) -> None: """ Make file present at path have execute for user/group/world (chmod +x) is no-op on windows per python docs """ - os.chmod(path, (0o777 & ~current_umask() | 0o111)) + os.chmod(path, _get_default_mode_plus_executable()) def zip_item_is_executable(info: ZipInfo) -> bool: @@ -151,8 +156,8 @@ def untar_file(filename: str, location: str) -> None: Untar the file (with path `filename`) to the destination `location`. All files are written based on system defaults and umask (i.e. permissions are not preserved), except that regular file members with any execute - permissions (user, group, or world) have "chmod +x" applied after being - written. Note that for windows, any execute changes using os.chmod are + permissions (user, group, or world) have "chmod +x" applied on top of the + default. Note that for windows, any execute changes using os.chmod are no-ops per the python docs. """ ensure_dir(location) @@ -170,62 +175,127 @@ def untar_file(filename: str, location: str) -> None: filename, ) mode = "r:*" + tar = tarfile.open(filename, mode, encoding="utf-8") try: leading = has_leading_dir([member.name for member in tar.getmembers()]) - for member in tar.getmembers(): - fn = member.name - if leading: - fn = split_leading_dir(fn)[1] - path = os.path.join(location, fn) - if not is_within_directory(location, path): - message = ( - "The tar file ({}) has a file ({}) trying to install " - "outside target directory ({})" - ) - raise InstallationError(message.format(filename, path, location)) - if member.isdir(): - ensure_dir(path) - elif member.issym(): - try: - tar._extract_member(member, path) - except Exception as exc: - # Some corrupt tar files seem to produce this - # (specifically bad symlinks) - logger.warning( - "In the tar file %s the member %s is invalid: %s", - filename, - member.name, - exc, - ) - continue - else: + + # PEP 706 added `tarfile.data_filter`, and made some other changes to + # Python's tarfile module (see below). The features were backported to + # security releases. + try: + data_filter = tarfile.data_filter + except AttributeError: + _untar_without_filter(filename, location, tar, leading) + else: + default_mode_plus_executable = _get_default_mode_plus_executable() + + def pip_filter(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo: + if leading: + member.name = split_leading_dir(member.name)[1] + orig_mode = member.mode try: - fp = tar.extractfile(member) - except (KeyError, AttributeError) as exc: - # Some corrupt tar files seem to produce this - # (specifically bad symlinks) - logger.warning( - "In the tar file %s the member %s is invalid: %s", - filename, - member.name, - exc, + try: + member = data_filter(member, location) + except tarfile.LinkOutsideDestinationError: + if sys.version_info[:3] in { + (3, 8, 17), + (3, 9, 17), + (3, 10, 12), + (3, 11, 4), + }: + # The tarfile filter in specific Python versions + # raises LinkOutsideDestinationError on valid input + # (https://github.com/python/cpython/issues/107845) + # Ignore the error there, but do use the + # more lax `tar_filter` + member = tarfile.tar_filter(member, location) + else: + raise + except tarfile.TarError as exc: + message = "Invalid member in the tar file {}: {}" + # Filter error messages mention the member name. + # No need to add it here. + raise InstallationError( + message.format( + filename, + exc, + ) ) - continue - ensure_dir(os.path.dirname(path)) - assert fp is not None - with open(path, "wb") as destfp: - shutil.copyfileobj(fp, destfp) - fp.close() - # Update the timestamp (useful for cython compiled files) - tar.utime(member, path) - # member have any execute permissions for user/group/world? - if member.mode & 0o111: - set_extracted_file_to_default_mode_plus_executable(path) + if member.isfile() and orig_mode & 0o111: + member.mode = default_mode_plus_executable + else: + # See PEP 706 note above. + # The PEP changed this from `int` to `Optional[int]`, + # where None means "use the default". Mypy doesn't + # know this yet. + member.mode = None # type: ignore [assignment] + return member + + tar.extractall(location, filter=pip_filter) + finally: tar.close() +def _untar_without_filter( + filename: str, + location: str, + tar: tarfile.TarFile, + leading: bool, +) -> None: + """Fallback for Python without tarfile.data_filter""" + for member in tar.getmembers(): + fn = member.name + if leading: + fn = split_leading_dir(fn)[1] + path = os.path.join(location, fn) + if not is_within_directory(location, path): + message = ( + "The tar file ({}) has a file ({}) trying to install " + "outside target directory ({})" + ) + raise InstallationError(message.format(filename, path, location)) + if member.isdir(): + ensure_dir(path) + elif member.issym(): + try: + tar._extract_member(member, path) + except Exception as exc: + # Some corrupt tar files seem to produce this + # (specifically bad symlinks) + logger.warning( + "In the tar file %s the member %s is invalid: %s", + filename, + member.name, + exc, + ) + continue + else: + try: + fp = tar.extractfile(member) + except (KeyError, AttributeError) as exc: + # Some corrupt tar files seem to produce this + # (specifically bad symlinks) + logger.warning( + "In the tar file %s the member %s is invalid: %s", + filename, + member.name, + exc, + ) + continue + ensure_dir(os.path.dirname(path)) + assert fp is not None + with open(path, "wb") as destfp: + shutil.copyfileobj(fp, destfp) + fp.close() + # Update the timestamp (useful for cython compiled files) + tar.utime(member, path) + # member have any execute permissions for user/group/world? + if member.mode & 0o111: + set_extracted_file_to_default_mode_plus_executable(path) + + def unpack_file( filename: str, location: str, diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index 1f0b59dbd6b..3fdd822e739 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -155,7 +155,13 @@ def test_unpack_tar_failure(self) -> None: test_tar = self.make_tar_file("test_tar.tar", files) with pytest.raises(InstallationError) as e: untar_file(test_tar, self.tempdir) - assert "trying to install outside target directory" in str(e.value) + + # The error message comes from tarfile.data_filter when it is available, + # otherwise from pip's own check. + if hasattr(tarfile, "data_filter"): + assert "is outside the destination" in str(e.value) + else: + assert "trying to install outside target directory" in str(e.value) def test_unpack_tar_success(self) -> None: """ @@ -171,6 +177,26 @@ def test_unpack_tar_success(self) -> None: test_tar = self.make_tar_file("test_tar.tar", files) untar_file(test_tar, self.tempdir) + @pytest.mark.skipif( + not hasattr(tarfile, "data_filter"), + reason="tarfile filters (PEP-721) not available", + ) + def test_unpack_tar_filter(self) -> None: + """ + Test that the tarfile.data_filter is used to disallow dangerous + behaviour (PEP-721) + """ + test_tar = os.path.join(self.tempdir, "test_tar_filter.tar") + with tarfile.open(test_tar, "w") as mytar: + file_tarinfo = tarfile.TarInfo("bad-link") + file_tarinfo.type = tarfile.SYMTYPE + file_tarinfo.linkname = "../../../../pwn" + mytar.addfile(file_tarinfo, io.BytesIO(b"")) + with pytest.raises(InstallationError) as e: + untar_file(test_tar, self.tempdir) + + assert "is outside the destination" in str(e.value) + def test_unpack_tar_unicode(tmpdir: Path) -> None: test_tar = tmpdir / "test.tar"