Skip to content

Commit

Permalink
Use the data_filter when extracting tarballs, if it's available. (#…
Browse files Browse the repository at this point in the history
…12214)

Previous behaviour is used on Python without PEP-720 tarfile filters. (Note that the
feature is now in security releases of all supported versions.)

A custom filter (which wraps `data_filter`) is used to retain pip-specific behaviour:
- Removing a common leading directory
- Setting the mode (Unix permissions)

Compared to the previous behaviour, if a file can't be unpacked, the unpacking operation
will fail with `InstallError`, rather than skipping the individual file with
a `logger.warning`. This means that "some corrupt tar files" now can't be unpacked.

Note that PEP 721 limits itself to sdists, this change affects unpacking any
other tar file.
  • Loading branch information
encukou authored May 6, 2024
1 parent 71a08a7 commit 12c171f
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 51 deletions.
1 change: 1 addition & 0 deletions news/12111.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PEP 721: Use the ``data_filter`` when extracting tarballs, if it's available.
170 changes: 120 additions & 50 deletions src/pip/_internal/utils/unpacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import shutil
import stat
import sys
import tarfile
import zipfile
from typing import Iterable, List, Optional
Expand Down Expand Up @@ -85,12 +86,16 @@ def is_within_directory(directory: str, target: str) -> bool:
return prefix == abs_directory


def _get_default_mode_plus_executable() -> int:
return 0o777 & ~current_umask() | 0o111


def set_extracted_file_to_default_mode_plus_executable(path: str) -> None:
"""
Make file present at path have execute for user/group/world
(chmod +x) is no-op on windows per python docs
"""
os.chmod(path, (0o777 & ~current_umask() | 0o111))
os.chmod(path, _get_default_mode_plus_executable())


def zip_item_is_executable(info: ZipInfo) -> bool:
Expand Down Expand Up @@ -151,8 +156,8 @@ def untar_file(filename: str, location: str) -> None:
Untar the file (with path `filename`) to the destination `location`.
All files are written based on system defaults and umask (i.e. permissions
are not preserved), except that regular file members with any execute
permissions (user, group, or world) have "chmod +x" applied after being
written. Note that for windows, any execute changes using os.chmod are
permissions (user, group, or world) have "chmod +x" applied on top of the
default. Note that for windows, any execute changes using os.chmod are
no-ops per the python docs.
"""
ensure_dir(location)
Expand All @@ -170,62 +175,127 @@ def untar_file(filename: str, location: str) -> None:
filename,
)
mode = "r:*"

tar = tarfile.open(filename, mode, encoding="utf-8")
try:
leading = has_leading_dir([member.name for member in tar.getmembers()])
for member in tar.getmembers():
fn = member.name
if leading:
fn = split_leading_dir(fn)[1]
path = os.path.join(location, fn)
if not is_within_directory(location, path):
message = (
"The tar file ({}) has a file ({}) trying to install "
"outside target directory ({})"
)
raise InstallationError(message.format(filename, path, location))
if member.isdir():
ensure_dir(path)
elif member.issym():
try:
tar._extract_member(member, path)
except Exception as exc:
# Some corrupt tar files seem to produce this
# (specifically bad symlinks)
logger.warning(
"In the tar file %s the member %s is invalid: %s",
filename,
member.name,
exc,
)
continue
else:

# PEP 706 added `tarfile.data_filter`, and made some other changes to
# Python's tarfile module (see below). The features were backported to
# security releases.
try:
data_filter = tarfile.data_filter
except AttributeError:
_untar_without_filter(filename, location, tar, leading)
else:
default_mode_plus_executable = _get_default_mode_plus_executable()

def pip_filter(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo:
if leading:
member.name = split_leading_dir(member.name)[1]
orig_mode = member.mode
try:
fp = tar.extractfile(member)
except (KeyError, AttributeError) as exc:
# Some corrupt tar files seem to produce this
# (specifically bad symlinks)
logger.warning(
"In the tar file %s the member %s is invalid: %s",
filename,
member.name,
exc,
try:
member = data_filter(member, location)
except tarfile.LinkOutsideDestinationError:
if sys.version_info[:3] in {
(3, 8, 17),
(3, 9, 17),
(3, 10, 12),
(3, 11, 4),
}:
# The tarfile filter in specific Python versions
# raises LinkOutsideDestinationError on valid input
# (https://github.com/python/cpython/issues/107845)
# Ignore the error there, but do use the
# more lax `tar_filter`
member = tarfile.tar_filter(member, location)
else:
raise
except tarfile.TarError as exc:
message = "Invalid member in the tar file {}: {}"
# Filter error messages mention the member name.
# No need to add it here.
raise InstallationError(
message.format(
filename,
exc,
)
)
continue
ensure_dir(os.path.dirname(path))
assert fp is not None
with open(path, "wb") as destfp:
shutil.copyfileobj(fp, destfp)
fp.close()
# Update the timestamp (useful for cython compiled files)
tar.utime(member, path)
# member have any execute permissions for user/group/world?
if member.mode & 0o111:
set_extracted_file_to_default_mode_plus_executable(path)
if member.isfile() and orig_mode & 0o111:
member.mode = default_mode_plus_executable
else:
# See PEP 706 note above.
# The PEP changed this from `int` to `Optional[int]`,
# where None means "use the default". Mypy doesn't
# know this yet.
member.mode = None # type: ignore [assignment]
return member

tar.extractall(location, filter=pip_filter)

finally:
tar.close()


def _untar_without_filter(
filename: str,
location: str,
tar: tarfile.TarFile,
leading: bool,
) -> None:
"""Fallback for Python without tarfile.data_filter"""
for member in tar.getmembers():
fn = member.name
if leading:
fn = split_leading_dir(fn)[1]
path = os.path.join(location, fn)
if not is_within_directory(location, path):
message = (
"The tar file ({}) has a file ({}) trying to install "
"outside target directory ({})"
)
raise InstallationError(message.format(filename, path, location))
if member.isdir():
ensure_dir(path)
elif member.issym():
try:
tar._extract_member(member, path)
except Exception as exc:
# Some corrupt tar files seem to produce this
# (specifically bad symlinks)
logger.warning(
"In the tar file %s the member %s is invalid: %s",
filename,
member.name,
exc,
)
continue
else:
try:
fp = tar.extractfile(member)
except (KeyError, AttributeError) as exc:
# Some corrupt tar files seem to produce this
# (specifically bad symlinks)
logger.warning(
"In the tar file %s the member %s is invalid: %s",
filename,
member.name,
exc,
)
continue
ensure_dir(os.path.dirname(path))
assert fp is not None
with open(path, "wb") as destfp:
shutil.copyfileobj(fp, destfp)
fp.close()
# Update the timestamp (useful for cython compiled files)
tar.utime(member, path)
# member have any execute permissions for user/group/world?
if member.mode & 0o111:
set_extracted_file_to_default_mode_plus_executable(path)


def unpack_file(
filename: str,
location: str,
Expand Down
28 changes: 27 additions & 1 deletion tests/unit/test_utils_unpacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,13 @@ def test_unpack_tar_failure(self) -> None:
test_tar = self.make_tar_file("test_tar.tar", files)
with pytest.raises(InstallationError) as e:
untar_file(test_tar, self.tempdir)
assert "trying to install outside target directory" in str(e.value)

# The error message comes from tarfile.data_filter when it is available,
# otherwise from pip's own check.
if hasattr(tarfile, "data_filter"):
assert "is outside the destination" in str(e.value)
else:
assert "trying to install outside target directory" in str(e.value)

def test_unpack_tar_success(self) -> None:
"""
Expand All @@ -171,6 +177,26 @@ def test_unpack_tar_success(self) -> None:
test_tar = self.make_tar_file("test_tar.tar", files)
untar_file(test_tar, self.tempdir)

@pytest.mark.skipif(
not hasattr(tarfile, "data_filter"),
reason="tarfile filters (PEP-721) not available",
)
def test_unpack_tar_filter(self) -> None:
"""
Test that the tarfile.data_filter is used to disallow dangerous
behaviour (PEP-721)
"""
test_tar = os.path.join(self.tempdir, "test_tar_filter.tar")
with tarfile.open(test_tar, "w") as mytar:
file_tarinfo = tarfile.TarInfo("bad-link")
file_tarinfo.type = tarfile.SYMTYPE
file_tarinfo.linkname = "../../../../pwn"
mytar.addfile(file_tarinfo, io.BytesIO(b""))
with pytest.raises(InstallationError) as e:
untar_file(test_tar, self.tempdir)

assert "is outside the destination" in str(e.value)


def test_unpack_tar_unicode(tmpdir: Path) -> None:
test_tar = tmpdir / "test.tar"
Expand Down

0 comments on commit 12c171f

Please sign in to comment.