Skip to content

Commit

Permalink
Update tar validation to also check for abspath and symlinks
Browse files Browse the repository at this point in the history
This takes the original PR and adds checks for symlink and absolute
paths.  This brings it more in line with the `data` arg for Python
3.12's `filter` arg does.
  • Loading branch information
jamesls committed May 28, 2024
1 parent 2f6e51e commit 09ec745
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 20 deletions.
5 changes: 5 additions & 0 deletions .changes/next-release/34727419579041-bugfix-tar-48521.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "bugfix",
"category": "tar",
"description": "Validate tar extraction does not escape destination dir (#1990)"
}
73 changes: 53 additions & 20 deletions chalice/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,26 +235,59 @@ def extract_zipfile(self, zipfile_path: str, unpack_dir: str) -> None:

def extract_tarfile(self, tarfile_path: str, unpack_dir: str) -> None:
with tarfile.open(tarfile_path, 'r:*') as tar:
def is_within_directory(directory, target):

abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])

return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):

for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)


safe_extract(tar, unpack_dir)
# In Python 3.12+, there's a `filter` arg where passing a
# 'data' value will handle this behavior for us. To support older
# versions of Python we handle this ourselves. We can't hook
# into `extractall` directly so the idea is that we do a separate
# validation pass first to ensure there's no files that try
# to extract outside of the provided `unpack_dir`. This is roughly
# based off of what's done in the `data_filter()` in Python 3.12.
self._validate_safe_extract(tar, unpack_dir)
tar.extractall(unpack_dir)

def _validate_safe_extract(
self,
tar: tarfile.TarFile,
unpack_dir: str
) -> None:
for member in tar:
self._validate_single_tar_member(member, unpack_dir)

def _validate_single_tar_member(
self,
member: tarfile.TarInfo,
unpack_dir: str
) -> None:
name = member.name
dest_path = os.path.realpath(unpack_dir)
if name.startswith(('/', os.sep)):
name = member.path.lstrip('/' + os.sep)
if os.path.isabs(name):
raise RuntimeError(f"Absolute path in tarfile not allowed: {name}")
target_path = os.path.realpath(os.path.join(dest_path, name))
# Check we don't escape the destination dir, e.g `../../foo`
if os.path.commonpath([target_path, dest_path]) != dest_path:
raise RuntimeError(
f"Tar member outside destination dir: {target_path}")
# If we're dealing with a member that's some type of link, ensure
# it doesn't point to anything outside of the destination dir.
if member.islnk() or member.issym():
if os.path.abspath(member.linkname):
raise RuntimeError(f"Symlink to abspath: {member.linkname}")
if member.issym():
target_path = os.path.join(
dest_path,
os.path.dirname(name),
member.linkname,
)
else:
target_path = os.path.join(
dest_path,
member.linkname)
target_path = os.path.realpath(target_path)
if os.path.commonpath([target_path, dest_path]) != dest_path:
raise RuntimeError(
f"Symlink outside of dest dir: {target_path}")

def directory_exists(self, path: str) -> bool:
return os.path.isdir(path)
Expand Down
26 changes: 26 additions & 0 deletions tests/functional/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import os
import io
import tarfile

import pytest

Expand Down Expand Up @@ -108,6 +109,31 @@ def test_remove_stage_from_deployed_values_no_file(tmpdir):
assert not os.path.isfile(filename)


def test_error_raised_on_tar_out_of_extract_dir(tmp_path, osutils):
filepath = tmp_path / 'badfile'
filepath.write_text('single file')
badtarpath = tmp_path / 'badtar.tar.gz'
extractdir = tmp_path / 'nest1' / 'nest2' / 'nest3'
with tarfile.open(badtarpath, 'w:gz') as tar:
tar.add(filepath, arcname='../../escaped-dir.txt')
with pytest.raises(RuntimeError):
osutils.extract_tarfile(str(badtarpath), extractdir)


def test_error_raise_tar_symlink_out_of_extract_dir(tmp_path, osutils):
dir_with_symlink = tmp_path / 'nest1' / 'nest2'
dir_with_symlink.mkdir(parents=True, exist_ok=True)
outside_file = tmp_path / 'outside.txt'
outside_file.write_text('outside of dir')
symlink_file = dir_with_symlink / 'myfile.txt'
os.symlink(outside_file, symlink_file)
tarpath = dir_with_symlink / 'badtar.tar.gz'
with tarfile.open(tarpath, 'w:gz') as tar:
tar.add(symlink_file)
with pytest.raises(RuntimeError):
osutils.extract_tarfile(str(tarpath), dir_with_symlink)


class TestOSUtils(object):
def test_can_read_unicode(self, tmpdir, osutils):
filename = str(tmpdir.join('file.txt'))
Expand Down

0 comments on commit 09ec745

Please sign in to comment.