From c74d0dce9cef0215a79ba4051e7f46ca99e3899a Mon Sep 17 00:00:00 2001 From: William Nelson Date: Thu, 18 Jul 2024 20:43:28 -0700 Subject: [PATCH 1/9] gh-121999: Change default tarfile filter to 'data' --- Doc/library/tarfile.rst | 41 +++++++++---------- Lib/tarfile.py | 8 +--- Lib/test/test_tarfile.py | 34 --------------- ...-07-18-21-19-04.gh-issue-121999.8IBbTK.rst | 1 + 4 files changed, 21 insertions(+), 63 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 5b624f3533136f..b5abe656b252fd 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -40,9 +40,11 @@ Some facts and figures: Archives are extracted using a :ref:`filter `, which makes it possible to either limit surprising/dangerous features, or to acknowledge that they are expected and the archive is fully trusted. - By default, archives are fully trusted, but this default is deprecated - and slated to change in Python 3.14. +.. versionchanged:: 3.14 + The default extraction filter was 'fully trusted' but is now 'data' which + which disallows dangerous features like links to absolute paths or paths + outside the destination. .. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs) @@ -495,19 +497,23 @@ be finalized; only the internally used file object will be closed. See the The *filter* argument specifies how ``members`` are modified or rejected before extraction. See :ref:`tarfile-extraction-filter` for details. - It is recommended to set this explicitly depending on which *tar* features - you need to support. + It is recommended to set this explicitly only if unusual *tar* features + are required. .. warning:: - Never extract archives from untrusted sources without prior inspection. + The default filter is set to ``filter='data'`` to prevent the most + dangerous security issues, read the :ref:`tarfile-extraction-filter` + section for details. + + Never extract archives from untrusted sources without prior inspection, + even when using the ``'data'`` filter, but especially if using the + ``'tar'`` or ``'fully_trusted'`` filters. + It is possible that files are created outside of *path*, e.g. members that have absolute filenames starting with ``"/"`` or filenames with two dots ``".."``. - Set ``filter='data'`` to prevent the most dangerous security issues, - and read the :ref:`tarfile-extraction-filter` section for details. - .. versionchanged:: 3.5 Added the *numeric_owner* parameter. @@ -538,8 +544,9 @@ be finalized; only the internally used file object will be closed. See the See the warning for :meth:`extractall`. - Set ``filter='data'`` to prevent the most dangerous security issues, - and read the :ref:`tarfile-extraction-filter` section for details. + The default filter is set to ``filter='data'`` to prevent the most + dangerous security issues, read the :ref:`tarfile-extraction-filter` + section for details. .. versionchanged:: 3.2 Added the *set_attrs* parameter. @@ -603,12 +610,7 @@ be finalized; only the internally used file object will be closed. See the argument to :meth:`~TarFile.extract`. If ``extraction_filter`` is ``None`` (the default), - calling an extraction method without a *filter* argument will raise a - ``DeprecationWarning``, - and fall back to the :func:`fully_trusted ` filter, - whose dangerous behavior matches previous versions of Python. - - In Python 3.14+, leaving ``extraction_filter=None`` will cause + calling an extraction method without a *filter* argument will cause extraction methods to use the :func:`data ` filter by default. The attribute may be set on instances or overridden in subclasses. @@ -992,12 +994,7 @@ can be: * ``None`` (default): Use :attr:`TarFile.extraction_filter`. - If that is also ``None`` (the default), raise a ``DeprecationWarning``, - and fall back to the ``'fully_trusted'`` filter, whose dangerous behavior - matches previous versions of Python. - - In Python 3.14, the ``'data'`` filter will become the default instead. - It's possible to switch earlier; see :attr:`TarFile.extraction_filter`. + If that is also ``None`` (the default), the ``'data'`` filter will be used. * A callable which will be called for each extracted member with a :ref:`TarInfo ` describing the member and the destination diff --git a/Lib/tarfile.py b/Lib/tarfile.py index d5d8a469779f50..4fa7bb6740adbb 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2248,13 +2248,7 @@ def _get_filter_function(self, filter): if filter is None: filter = self.extraction_filter if filter is None: - import warnings - warnings.warn( - 'Python 3.14 will, by default, filter extracted tar ' - + 'archives and reject files or modify their metadata. ' - + 'Use the filter argument to control this behavior.', - DeprecationWarning, stacklevel=3) - return fully_trusted_filter + return data_filter if isinstance(filter, str): raise TypeError( 'String names are not supported for ' diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index f715940de1d584..94ce2391680752 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -738,31 +738,6 @@ def test_extract_directory(self): finally: os_helper.rmtree(DIR) - def test_deprecation_if_no_filter_passed_to_extractall(self): - DIR = pathlib.Path(TEMPDIR) / "extractall" - with ( - os_helper.temp_dir(DIR), - tarfile.open(tarname, encoding="iso8859-1") as tar - ): - directories = [t for t in tar if t.isdir()] - with self.assertWarnsRegex(DeprecationWarning, "Use the filter argument") as cm: - tar.extractall(DIR, directories) - # check that the stacklevel of the deprecation warning is correct: - self.assertEqual(cm.filename, __file__) - - def test_deprecation_if_no_filter_passed_to_extract(self): - dirtype = "ustar/dirtype" - DIR = pathlib.Path(TEMPDIR) / "extractall" - with ( - os_helper.temp_dir(DIR), - tarfile.open(tarname, encoding="iso8859-1") as tar - ): - tarinfo = tar.getmember(dirtype) - with self.assertWarnsRegex(DeprecationWarning, "Use the filter argument") as cm: - tar.extract(tarinfo, path=DIR) - # check that the stacklevel of the deprecation warning is correct: - self.assertEqual(cm.filename, __file__) - def test_extractall_pathlike_dir(self): DIR = os.path.join(TEMPDIR, "extractall") with os_helper.temp_dir(DIR), \ @@ -4011,15 +3986,6 @@ def test_data_filter(self): self.assertIs(filtered.name, tarinfo.name) self.assertIs(filtered.type, tarinfo.type) - def test_default_filter_warns(self): - """Ensure the default filter warns""" - with ArchiveMaker() as arc: - arc.add('foo') - with warnings_helper.check_warnings( - ('Python 3.14', DeprecationWarning)): - with self.check_context(arc.open(), None): - self.expect_file('foo') - def test_change_default_filter_on_instance(self): tar = tarfile.TarFile(tarname, 'r') def strict_filter(tarinfo, path): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst new file mode 100644 index 00000000000000..6b6bcdc7aa1fa5 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst @@ -0,0 +1 @@ +Update tarfile library to use 'data' filter by default when extracting From 12f183ad83b664ac47901ef01cb463c34c14dd2b Mon Sep 17 00:00:00 2001 From: William Nelson Date: Thu, 18 Jul 2024 22:25:39 -0700 Subject: [PATCH 2/9] Update shutil docs and tests --- Doc/library/shutil.rst | 9 +++++---- Lib/test/test_shutil.py | 3 --- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index fd32479195eca8..85830084fe1e69 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -706,17 +706,18 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. The keyword-only *filter* argument is passed to the underlying unpacking function. For zip files, *filter* is not accepted. - For tar files, it is recommended to set it to ``'data'``, + For tar files, it is recommended to use the default, ``'data'``, unless using features specific to tar and UNIX-like filesystems. (See :ref:`tarfile-extraction-filter` for details.) - The ``'data'`` filter will become the default for tar files - in Python 3.14. .. audit-event:: shutil.unpack_archive filename,extract_dir,format shutil.unpack_archive .. warning:: - Never extract archives from untrusted sources without prior inspection. + Never extract archives from untrusted sources without prior inspection, + even when using the ``'data'`` filter, but especially if using the + ``'tar'`` or ``'fully_trusted'`` filters. + It is possible that files are created outside of the path specified in the *extract_dir* argument, e.g. members that have absolute filenames starting with "/" or filenames with two dots "..". diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index c458c5df32572b..c770be21b41c2b 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -2145,9 +2145,6 @@ def check_unpack_archive_with_converter(self, format, converter, **kwargs): def check_unpack_tarball(self, format): self.check_unpack_archive(format, filter='fully_trusted') self.check_unpack_archive(format, filter='data') - with warnings_helper.check_warnings( - ('Python 3.14', DeprecationWarning)): - self.check_unpack_archive(format) def test_unpack_archive_tar(self): self.check_unpack_tarball('tar') From 2d67e8e52d444b146f2df5d1078737c7e66c2776 Mon Sep 17 00:00:00 2001 From: WilliamRoyNelson Date: Fri, 19 Jul 2024 08:37:04 -0700 Subject: [PATCH 3/9] Update Doc/library/tarfile.rst Co-authored-by: Tomas R --- Doc/library/tarfile.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index b5abe656b252fd..59c721fe875ec2 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -42,7 +42,7 @@ Some facts and figures: or to acknowledge that they are expected and the archive is fully trusted. .. versionchanged:: 3.14 - The default extraction filter was 'fully trusted' but is now 'data' which + The default extraction filter was :func:`fully_trusted ` but is now :func:`data ` which which disallows dangerous features like links to absolute paths or paths outside the destination. From 15216b9aa19aeb3d316c416b64139637bb54dd75 Mon Sep 17 00:00:00 2001 From: Scott Odle Date: Fri, 19 Jul 2024 21:12:31 -0600 Subject: [PATCH 4/9] Add test for default filter of tarfile.extractall --- Lib/test/test_tarfile.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 94ce2391680752..a25c51c5d0a2bf 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -722,6 +722,28 @@ def format_mtime(mtime): tar.close() os_helper.rmtree(DIR) + @staticmethod + @unittest.mock.patch("tarfile.data_filter", wraps=tarfile.data_filter) + @unittest.mock.patch("tarfile.tar_filter", wraps=tarfile.tar_filter) + @unittest.mock.patch("tarfile.fully_trusted_filter", wraps=tarfile.fully_trusted_filter) + def test_extractall_default_filter(mock_ft_filter: unittest.mock.Mock, + mock_tar_filter: unittest.mock.Mock, + mock_data_filter: unittest.mock.Mock): + tar = tarfile.open(tarname, encoding="iso8859-1") + DIR = os.path.join(TEMPDIR, "extractall_default_filter") + os.mkdir(DIR) + try: + directories = [t for t in tar if t.isdir()] + tar.extractall(DIR, directories) + + # Test that the default filter is now "data", and the other filter types are not used. + mock_data_filter.assert_called() + mock_ft_filter.assert_not_called() + mock_tar_filter.assert_not_called() + finally: + tar.close() + os_helper.rmtree(DIR) + @os_helper.skip_unless_working_chmod def test_extract_directory(self): dirtype = "ustar/dirtype" From 4571846be4243c84e6c1a0d135591b882139e9c4 Mon Sep 17 00:00:00 2001 From: Scott Odle Date: Sun, 21 Jul 2024 12:06:33 -0600 Subject: [PATCH 5/9] Update tarfile tests to use context managers --- Lib/test/test_tarfile.py | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index a25c51c5d0a2bf..3323c81e03af33 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -694,10 +694,11 @@ def test_extract_hardlink(self): def test_extractall(self): # Test if extractall() correctly restores directory permissions # and times (see issue1735). - tar = tarfile.open(tarname, encoding="iso8859-1") - DIR = os.path.join(TEMPDIR, "extractall") - os.mkdir(DIR) - try: + DIR = pathlib.Path(TEMPDIR) / "extractall" + with ( + os_helper.temp_dir(DIR), + tarfile.open(tarname, encoding="iso8859-1") as tar + ): directories = [t for t in tar if t.isdir()] tar.extractall(DIR, directories, filter='fully_trusted') for tarinfo in directories: @@ -718,31 +719,24 @@ def format_mtime(mtime): format_mtime(file_mtime), path) self.assertEqual(tarinfo.mtime, file_mtime, errmsg) - finally: - tar.close() - os_helper.rmtree(DIR) @staticmethod - @unittest.mock.patch("tarfile.data_filter", wraps=tarfile.data_filter) - @unittest.mock.patch("tarfile.tar_filter", wraps=tarfile.tar_filter) - @unittest.mock.patch("tarfile.fully_trusted_filter", wraps=tarfile.fully_trusted_filter) - def test_extractall_default_filter(mock_ft_filter: unittest.mock.Mock, - mock_tar_filter: unittest.mock.Mock, - mock_data_filter: unittest.mock.Mock): - tar = tarfile.open(tarname, encoding="iso8859-1") - DIR = os.path.join(TEMPDIR, "extractall_default_filter") - os.mkdir(DIR) - try: + def test_extractall_default_filter(): + # Test that the default filter is now "data", and the other filter types are not used. + DIR = pathlib.Path(TEMPDIR) / "extractall_default_filter" + with ( + os_helper.temp_dir(DIR), + tarfile.open(tarname, encoding="iso8859-1") as tar, + unittest.mock.patch("tarfile.data_filter", wraps=tarfile.data_filter) as mock_data_filter, + unittest.mock.patch("tarfile.tar_filter", wraps=tarfile.tar_filter) as mock_tar_filter, + unittest.mock.patch("tarfile.fully_trusted_filter", wraps=tarfile.fully_trusted_filter) as mock_ft_filter + ): directories = [t for t in tar if t.isdir()] tar.extractall(DIR, directories) - # Test that the default filter is now "data", and the other filter types are not used. mock_data_filter.assert_called() mock_ft_filter.assert_not_called() mock_tar_filter.assert_not_called() - finally: - tar.close() - os_helper.rmtree(DIR) @os_helper.skip_unless_working_chmod def test_extract_directory(self): From 136bc7e97acc8b67e59823652fcbfee80df8b1c4 Mon Sep 17 00:00:00 2001 From: WilliamRoyNelson Date: Sun, 21 Jul 2024 15:34:32 -0700 Subject: [PATCH 6/9] Improved phrasing for documentation of changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/tarfile.rst | 11 ++++++----- .../2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 59c721fe875ec2..dde91f93b81f64 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -42,9 +42,10 @@ Some facts and figures: or to acknowledge that they are expected and the archive is fully trusted. .. versionchanged:: 3.14 - The default extraction filter was :func:`fully_trusted ` but is now :func:`data ` which - which disallows dangerous features like links to absolute paths or paths - outside the destination. + Set the default extraction filter to :func:`data `, + which disallows dangerous features such as links to absolute paths + or paths outside of the destination. Previously, the filter strategy + was :func:`fully_trusted `. .. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs) @@ -503,7 +504,7 @@ be finalized; only the internally used file object will be closed. See the .. warning:: The default filter is set to ``filter='data'`` to prevent the most - dangerous security issues, read the :ref:`tarfile-extraction-filter` + dangerous security issues. Read the :ref:`tarfile-extraction-filter` section for details. Never extract archives from untrusted sources without prior inspection, @@ -545,7 +546,7 @@ be finalized; only the internally used file object will be closed. See the See the warning for :meth:`extractall`. The default filter is set to ``filter='data'`` to prevent the most - dangerous security issues, read the :ref:`tarfile-extraction-filter` + dangerous security issues. Read the :ref:`tarfile-extraction-filter` section for details. .. versionchanged:: 3.2 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst index 6b6bcdc7aa1fa5..e65aa993566446 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-07-18-21-19-04.gh-issue-121999.8IBbTK.rst @@ -1 +1,2 @@ -Update tarfile library to use 'data' filter by default when extracting +The default extraction filter for the :mod:`tarfile` module is now +set to :func:`'data' `. From 57c60e7287823fd34d8e1365e40c63d34ca2fb40 Mon Sep 17 00:00:00 2001 From: William Nelson Date: Sun, 21 Jul 2024 15:59:58 -0700 Subject: [PATCH 7/9] Remove unnecessarily wordy phrasing in warning about tarfiles. --- Doc/library/shutil.rst | 9 +++++---- Doc/library/tarfile.rst | 13 +++++-------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 85830084fe1e69..2643ef4ecf15eb 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -714,14 +714,15 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. .. warning:: - Never extract archives from untrusted sources without prior inspection, - even when using the ``'data'`` filter, but especially if using the - ``'tar'`` or ``'fully_trusted'`` filters. - + Never extract archives from untrusted sources without prior inspection. It is possible that files are created outside of the path specified in the *extract_dir* argument, e.g. members that have absolute filenames starting with "/" or filenames with two dots "..". + The default filter is set to ``filter='data'`` to prevent the most + dangerous security issues. Read the :ref:`tarfile-extraction-filter` + section for details. + .. versionchanged:: 3.7 Accepts a :term:`path-like object` for *filename* and *extract_dir*. diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index dde91f93b81f64..5c680c983399c6 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -503,18 +503,15 @@ be finalized; only the internally used file object will be closed. See the .. warning:: - The default filter is set to ``filter='data'`` to prevent the most - dangerous security issues. Read the :ref:`tarfile-extraction-filter` - section for details. - - Never extract archives from untrusted sources without prior inspection, - even when using the ``'data'`` filter, but especially if using the - ``'tar'`` or ``'fully_trusted'`` filters. - + Never extract archives from untrusted sources without prior inspection. It is possible that files are created outside of *path*, e.g. members that have absolute filenames starting with ``"/"`` or filenames with two dots ``".."``. + The default filter is set to ``filter='data'`` to prevent the most + dangerous security issues. Read the :ref:`tarfile-extraction-filter` + section for details. + .. versionchanged:: 3.5 Added the *numeric_owner* parameter. From 619dc2877f6a76e8921cb52940ed8fbbc3a6ed31 Mon Sep 17 00:00:00 2001 From: Scott Odle Date: Mon, 22 Jul 2024 08:45:35 -0600 Subject: [PATCH 8/9] revert style changes for unrelated tests --- Lib/test/test_tarfile.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 3323c81e03af33..5600f0746770b8 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -694,11 +694,10 @@ def test_extract_hardlink(self): def test_extractall(self): # Test if extractall() correctly restores directory permissions # and times (see issue1735). - DIR = pathlib.Path(TEMPDIR) / "extractall" - with ( - os_helper.temp_dir(DIR), - tarfile.open(tarname, encoding="iso8859-1") as tar - ): + tar = tarfile.open(tarname, encoding="iso8859-1") + DIR = os.path.join(TEMPDIR, "extractall") + os.mkdir(DIR) + try: directories = [t for t in tar if t.isdir()] tar.extractall(DIR, directories, filter='fully_trusted') for tarinfo in directories: @@ -719,6 +718,9 @@ def format_mtime(mtime): format_mtime(file_mtime), path) self.assertEqual(tarinfo.mtime, file_mtime, errmsg) + finally: + tar.close() + os_helper.rmtree(DIR) @staticmethod def test_extractall_default_filter(): From ae161baa8614a6b1bb61bfea90f87ebe16c037f5 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 25 Jul 2024 13:33:28 +0200 Subject: [PATCH 9/9] Reword documentation --- Doc/library/shutil.rst | 12 ++++---- Doc/library/tarfile.rst | 64 ++++++++++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 2643ef4ecf15eb..220207e5f3cbbf 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -706,8 +706,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. The keyword-only *filter* argument is passed to the underlying unpacking function. For zip files, *filter* is not accepted. - For tar files, it is recommended to use the default, ``'data'``, - unless using features specific to tar and UNIX-like filesystems. + For tar files, it is recommended to use ``'data'`` (default since Python + 3.14), unless using features specific to tar and UNIX-like filesystems. (See :ref:`tarfile-extraction-filter` for details.) .. audit-event:: shutil.unpack_archive filename,extract_dir,format shutil.unpack_archive @@ -719,9 +719,11 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. the *extract_dir* argument, e.g. members that have absolute filenames starting with "/" or filenames with two dots "..". - The default filter is set to ``filter='data'`` to prevent the most - dangerous security issues. Read the :ref:`tarfile-extraction-filter` - section for details. + Since Python 3.14, the defaults for both built-in formats (zip and tar + files) will prevent the most dangerous of such security issues, + but will not prevent *all* unintended behavior. + Read the :ref:`tarfile-further-verification` + section for tar-specific details. .. versionchanged:: 3.7 Accepts a :term:`path-like object` for *filename* and *extract_dir*. diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 5c680c983399c6..631d869e42d09d 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -43,9 +43,9 @@ Some facts and figures: .. versionchanged:: 3.14 Set the default extraction filter to :func:`data `, - which disallows dangerous features such as links to absolute paths + which disallows some dangerous features such as links to absolute paths or paths outside of the destination. Previously, the filter strategy - was :func:`fully_trusted `. + was equivalent to :func:`fully_trusted `. .. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs) @@ -498,19 +498,18 @@ be finalized; only the internally used file object will be closed. See the The *filter* argument specifies how ``members`` are modified or rejected before extraction. See :ref:`tarfile-extraction-filter` for details. - It is recommended to set this explicitly only if unusual *tar* features - are required. + It is recommended to set this explicitly only if specific *tar* features + are required, or as ``filter='data'`` to support Python versions with a less + secure default (3.13 and lower). .. warning:: Never extract archives from untrusted sources without prior inspection. - It is possible that files are created outside of *path*, e.g. members - that have absolute filenames starting with ``"/"`` or filenames with two - dots ``".."``. - The default filter is set to ``filter='data'`` to prevent the most - dangerous security issues. Read the :ref:`tarfile-extraction-filter` - section for details. + Since Python 3.14, the default (:func:`data `) will prevent + the most dangerous security issues. + However, it will not prevent *all* unintended or insecure behavior. + Read the :ref:`tarfile-extraction-filter` section for details. .. versionchanged:: 3.5 Added the *numeric_owner* parameter. @@ -521,6 +520,9 @@ be finalized; only the internally used file object will be closed. See the .. versionchanged:: 3.12 Added the *filter* parameter. + .. versionchanged:: 3.14 + The *filter* parameter now defaults to ``'data'``. + .. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False, filter=None) @@ -540,11 +542,8 @@ be finalized; only the internally used file object will be closed. See the .. warning:: - See the warning for :meth:`extractall`. - - The default filter is set to ``filter='data'`` to prevent the most - dangerous security issues. Read the :ref:`tarfile-extraction-filter` - section for details. + Never extract archives from untrusted sources without prior inspection. + See the warning for :meth:`extractall` for details. .. versionchanged:: 3.2 Added the *set_attrs* parameter. @@ -607,9 +606,8 @@ be finalized; only the internally used file object will be closed. See the String names are not allowed for this attribute, unlike the *filter* argument to :meth:`~TarFile.extract`. - If ``extraction_filter`` is ``None`` (the default), - calling an extraction method without a *filter* argument will cause - extraction methods to use the :func:`data ` filter by default. + If ``extraction_filter`` is ``None`` (the default), extraction methods + will use the :func:`data ` filter by default. The attribute may be set on instances or overridden in subclasses. It also is possible to set it on the ``TarFile`` class itself to set a @@ -619,6 +617,14 @@ be finalized; only the internally used file object will be closed. See the To set a global default this way, a filter function needs to be wrapped in :func:`staticmethod()` to prevent injection of a ``self`` argument. + .. versionchanged:: 3.14 + + The default filter is set to :func:`data `, + which disallows some dangerous features such as links to absolute paths + or paths outside of the destination. + Previously, the default was equivalent to + :func:`fully_trusted `. + .. method:: TarFile.add(name, arcname=None, recursive=True, *, filter=None) Add the file *name* to the archive. *name* may be any type of file @@ -969,6 +975,12 @@ In most cases, the full functionality is not needed. Therefore, *tarfile* supports extraction filters: a mechanism to limit functionality, and thus mitigate some of the security issues. +.. warning:: + + None of the available filters blocks *all* dangerous archive features. + Never extract archives from untrusted sources without prior inspection. + See also :ref:`tarfile-further-verification`. + .. seealso:: :pep:`706` @@ -994,6 +1006,12 @@ can be: If that is also ``None`` (the default), the ``'data'`` filter will be used. + .. versionchanged:: 3.14 + + The default filter is set to :func:`data `. + Previously, the default was equivalent to + :func:`fully_trusted `. + * A callable which will be called for each extracted member with a :ref:`TarInfo ` describing the member and the destination path to where the archive is extracted (i.e. the same path is used for all @@ -1075,6 +1093,9 @@ reused in custom filters: Return the modified ``TarInfo`` member. + Note that this filter does not block *all* dangerous archive features. + See :ref:`tarfile-further-verification` for details. + .. _tarfile-extraction-refuse: @@ -1088,6 +1109,8 @@ With ``errorlevel=0`` the error will be logged and the member will be skipped, but extraction will continue. +.. _tarfile-further-verification: + Hints for further verification ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1105,9 +1128,10 @@ Here is an incomplete list of things to consider: disk, memory and CPU usage. * Check filenames against an allow-list of characters (to filter out control characters, confusables, foreign path separators, - etc.). + and so on). * Check that filenames have expected extensions (discouraging files that - execute when you “click on them”, or extension-less files like Windows special device names). + execute when you “click on them”, or extension-less files like Windows + special device names). * Limit the number of extracted files, total size of extracted data, filename length (including symlink length), and size of individual files. * Check for files that would be shadowed on case-insensitive filesystems.