Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/1113 patch CVE-2007-4559 #1114

Merged
merged 4 commits into from
Nov 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@

Unreleased Changes
------------------
* General
* Fixed a possible path traversal vulnerability when working with datastack
archives. This patches CVE-2007-4559, reported to us by Trellix.
https://github.com/natcap/invest/issues/1113
* Habitat Quality
* All spatial inputs including the access vector and threat rasters are
now reprojected to the ``lulc_cur_path`` raster. This fixes a bug where
Expand Down
38 changes: 36 additions & 2 deletions src/natcap/invest/datastack.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,41 @@
'args model_name invest_version')


def _tarfile_safe_extract(archive_path, dest_dir_path):
"""Extract a tarfile in a safe way.

This function avoids the CVE-2007-4559 exploit that's been a vulnerability
in the python stdlib for at least 15 years now and should really be patched
upstream.

Args:
archive_path (string): The path to a tarfile, such as a datastack
archive created by InVEST.
dest_dir_path (string): The path to the destination directory, where
the contents should be unzipped.

Returns:
``None``
"""
# The guts of this function are taken from Trellix's PR to InVEST. See
# https://github.com/natcap/invest/pull/1099 for details.
with tarfile.open(archive_path) as tar:
def is_within_directory(directory, target):
abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)
prefix = os.path.commonprefix([abs_directory, abs_target])
return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")
tar.extractall(path, members, numeric_owner=numeric_owner)

safe_extract(tar, dest_dir_path)


def _copy_spatial_files(spatial_filepath, target_dir):
"""Copy spatial files to a new directory.

Expand Down Expand Up @@ -451,8 +486,7 @@ def extract_datastack_archive(datastack_path, dest_dir_path):
LOGGER.info('Extracting archive %s to %s', datastack_path, dest_dir_path)
dest_dir_path = os.path.abspath(dest_dir_path)
# extract the archive to the workspace
with tarfile.open(datastack_path) as tar:
tar.extractall(dest_dir_path)
_tarfile_safe_extract(datastack_path, dest_dir_path)

# get the arguments dictionary
arguments_dict = json.load(open(
Expand Down
17 changes: 5 additions & 12 deletions tests/test_datastack.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,7 @@ def test_collect_simple_parameters(self):
datastack.build_datastack_archive(
params, 'test_datastack_modules.simple_parameters', archive_path)
out_directory = os.path.join(self.workspace, 'extracted_archive')

with tarfile.open(archive_path) as tar:
tar.extractall(out_directory)
datastack._tarfile_safe_extract(archive_path, out_directory)

self.assertEqual(len(os.listdir(out_directory)), 3)

Expand Down Expand Up @@ -158,9 +156,7 @@ def test_collect_rasters(self):

# extract the archive
out_directory = os.path.join(self.workspace, 'extracted_archive')

with tarfile.open(archive_path) as tar:
tar.extractall(out_directory)
datastack._tarfile_safe_extract(archive_path, out_directory)

archived_params = json.load(
open(os.path.join(
Expand Down Expand Up @@ -202,8 +198,7 @@ def test_collect_vectors(self):

# extract the archive
out_directory = os.path.join(dest_dir, 'extracted_archive')
with tarfile.open(archive_path) as tar:
tar.extractall(out_directory)
datastack._tarfile_safe_extract(archive_path, out_directory)

archived_params = json.load(
open(os.path.join(
Expand Down Expand Up @@ -245,8 +240,7 @@ def test_nonspatial_files(self):

# extract the archive
out_directory = os.path.join(self.workspace, 'extracted_archive')
with tarfile.open(archive_path) as tar:
tar.extractall(out_directory)
datastack._tarfile_safe_extract(archive_path, out_directory)

archived_params = json.load(
open(os.path.join(out_directory,
Expand Down Expand Up @@ -283,8 +277,7 @@ def test_duplicate_filepaths(self):

# extract the archive
out_directory = os.path.join(self.workspace, 'extracted_archive')
with tarfile.open(archive_path) as tar:
tar.extractall(out_directory)
datastack._tarfile_safe_extract(archive_path, out_directory)

archived_params = json.load(
open(os.path.join(out_directory,
Expand Down