From 33bc8e62899223d3e0d5abf8ec6d8af76c10b07e Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Tue, 25 Jun 2024 13:53:10 +0000 Subject: [PATCH 1/5] Specify filter='data' for tarball extractions --- conda_build/convert.py | 5 ++--- conda_build/render.py | 2 +- conda_build/utils.py | 26 ++++---------------------- 3 files changed, 7 insertions(+), 26 deletions(-) diff --git a/conda_build/convert.py b/conda_build/convert.py index e910d47e21..6064423503 100644 --- a/conda_build/convert.py +++ b/conda_build/convert.py @@ -131,9 +131,8 @@ def extract_temporary_directory(file_path): """ temporary_directory = tempfile.mkdtemp() - source = tarfile.open(file_path) - source.extractall(temporary_directory) - source.close() + with tarfile.open(file_path) as tar: + tar.extractall(temporary_directory, filter="data") return temporary_directory diff --git a/conda_build/render.py b/conda_build/render.py index 0c80df0005..7e1fd79a8d 100644 --- a/conda_build/render.py +++ b/conda_build/render.py @@ -934,7 +934,7 @@ def open_recipe(recipe: str | os.PathLike | Path) -> Iterator[Path]: elif recipe.suffixes in [[".tar"], [".tar", ".gz"], [".tgz"], [".tar", ".bz2"]]: # extract the recipe to a temporary directory with TemporaryDirectory() as tmp, tarfile.open(recipe, "r:*") as tar: - tar.extractall(path=tmp) + tar.extractall(path=tmp, filter="data") yield Path(tmp) elif recipe.suffix == ".yaml": # read the recipe from the parent directory diff --git a/conda_build/utils.py b/conda_build/utils.py index e5e8d9c8c2..1826ab3815 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -783,32 +783,14 @@ def _tar_xf_fallback(tarball, dir_path, mode="r:*"): from .os_utils.external import find_executable if tarball.lower().endswith(".tar.z"): - uncompress = find_executable("uncompress") + uncompress = find_executable("uncompress") or find_executable("gunzip") if not uncompress: - uncompress = find_executable("gunzip") - if not uncompress: - sys.exit( - """\ -uncompress (or gunzip) is required to unarchive .z source files. -""" - ) + sys.exit("uncompress/gunzip is required to unarchive .z source files.") check_call_env([uncompress, "-f", tarball]) tarball = tarball[:-2] - t = tarfile.open(tarball, mode) - members = t.getmembers() - for i, member in enumerate(members, 0): - if os.path.isabs(member.name): - member.name = os.path.relpath(member.name, "/") - cwd = os.path.realpath(os.getcwd()) - if not os.path.realpath(member.name).startswith(cwd): - member.name = member.name.replace("../", "") - if not os.path.realpath(member.name).startswith(cwd): - sys.exit("tarball contains unsafe path: " + member.name + " cwd is: " + cwd) - members[i] = member - - t.extractall(path=dir_path) - t.close() + with tarfile.open(tarball, mode) as tar: + tar.extractall(path=dir_path, filter="data") def tar_xf_file(tarball, entries): From e4b84f39063a1b7b217a90850a6a9730d9afce86 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Tue, 25 Jun 2024 14:29:49 +0000 Subject: [PATCH 2/5] Adjust filterwarnings to specifically target CPS --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2726b59495..c3c2aaed6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -129,7 +129,8 @@ filterwarnings = [ # ignore conda-index error "ignore::PendingDeprecationWarning:conda_index", "ignore::DeprecationWarning:conda_index", - "ignore:Python 3.14 will, by default, filter extracted tar archives and reject files or modify their metadata:DeprecationWarning", + # ignore conda-package-streaming error + "ignore:Python 3.14 will, by default, filter extracted tar archives and reject files or modify their metadata:DeprecationWarning:conda_package_streaming", ] markers = [ "serial: execute test serially (to avoid race conditions)", From d9f416fab60cfff4cc12fabd90a606466d4bf16a Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Wed, 26 Jun 2024 07:47:54 +0000 Subject: [PATCH 3/5] Add news --- news/5390-tarfile-extract-data | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 news/5390-tarfile-extract-data diff --git a/news/5390-tarfile-extract-data b/news/5390-tarfile-extract-data new file mode 100644 index 0000000000..8224ddf82b --- /dev/null +++ b/news/5390-tarfile-extract-data @@ -0,0 +1,19 @@ +### Enhancements + +* Use `tarfile.TarFile.extract[all](filter='data')` for improved tarball extraction security (e.g., disallow paths outside of or linking outside of the destination, remove group & other write/executable permissions, etc.). (#5390) + +### Bug fixes + +* + +### Deprecations + +* + +### Docs + +* + +### Other + +* From c9272d0f0e0cf9311fe76a0a0bdb4f23624c0dbc Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Wed, 26 Jun 2024 08:47:59 +0000 Subject: [PATCH 4/5] Duck typing for non-backported versions of Python --- conda_build/convert.py | 7 ++++++- conda_build/render.py | 7 ++++++- conda_build/utils.py | 7 ++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/conda_build/convert.py b/conda_build/convert.py index 6064423503..f1094172d3 100644 --- a/conda_build/convert.py +++ b/conda_build/convert.py @@ -132,7 +132,12 @@ def extract_temporary_directory(file_path): temporary_directory = tempfile.mkdtemp() with tarfile.open(file_path) as tar: - tar.extractall(temporary_directory, filter="data") + # FUTURE: Python 3.12+, remove try-except + try: + tar.extractall(path=temporary_directory, filter="data") + except TypeError: + # TypeError: `filter` is unsupported in this Python version + tar.extractall(path=temporary_directory) return temporary_directory diff --git a/conda_build/render.py b/conda_build/render.py index 7e1fd79a8d..a00ed1bf9c 100644 --- a/conda_build/render.py +++ b/conda_build/render.py @@ -934,7 +934,12 @@ def open_recipe(recipe: str | os.PathLike | Path) -> Iterator[Path]: elif recipe.suffixes in [[".tar"], [".tar", ".gz"], [".tgz"], [".tar", ".bz2"]]: # extract the recipe to a temporary directory with TemporaryDirectory() as tmp, tarfile.open(recipe, "r:*") as tar: - tar.extractall(path=tmp, filter="data") + # FUTURE: Python 3.12+, remove try-except + try: + tar.extractall(path=tmp, filter="data") + except TypeError: + # TypeError: `filter` is unsupported in this Python version + tar.extractall(path=tmp) yield Path(tmp) elif recipe.suffix == ".yaml": # read the recipe from the parent directory diff --git a/conda_build/utils.py b/conda_build/utils.py index 1826ab3815..87c7f8957d 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -790,7 +790,12 @@ def _tar_xf_fallback(tarball, dir_path, mode="r:*"): tarball = tarball[:-2] with tarfile.open(tarball, mode) as tar: - tar.extractall(path=dir_path, filter="data") + # FUTURE: Python 3.12+, remove try-except + try: + tar.extractall(path=dir_path, filter="data") + except TypeError: + # TypeError: `filter` is unsupported in this Python version + tar.extractall(path=dir_path) def tar_xf_file(tarball, entries): From af5856978a013a7b7b7f18fb5037b117c2a96f50 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Wed, 26 Jun 2024 09:37:30 +0000 Subject: [PATCH 5/5] Switch from tarfile to CPH --- conda_build/convert.py | 16 +++++----------- conda_build/render.py | 10 +++------- conda_build/utils.py | 23 +++++------------------ 3 files changed, 13 insertions(+), 36 deletions(-) diff --git a/conda_build/convert.py b/conda_build/convert.py index f1094172d3..2104cf9d69 100644 --- a/conda_build/convert.py +++ b/conda_build/convert.py @@ -18,6 +18,8 @@ from pathlib import Path from typing import TYPE_CHECKING +from conda_package_handling.api import extract + from .utils import ensure_list, filter_info_files, walk if TYPE_CHECKING: @@ -129,17 +131,9 @@ def extract_temporary_directory(file_path): Positional arguments: file_path (str) -- the file path to the source package tar file """ - temporary_directory = tempfile.mkdtemp() - - with tarfile.open(file_path) as tar: - # FUTURE: Python 3.12+, remove try-except - try: - tar.extractall(path=temporary_directory, filter="data") - except TypeError: - # TypeError: `filter` is unsupported in this Python version - tar.extractall(path=temporary_directory) - - return temporary_directory + tmp = tempfile.mkdtemp() + extract(file_path, dest_dir=tmp) + return tmp def update_dependencies(new_dependencies, existing_dependencies): diff --git a/conda_build/render.py b/conda_build/render.py index a00ed1bf9c..7c737348d7 100644 --- a/conda_build/render.py +++ b/conda_build/render.py @@ -31,6 +31,7 @@ from conda.gateways.disk.create import TemporaryDirectory from conda.models.records import PackageRecord from conda.models.version import VersionOrder +from conda_package_handling.api import extract from . import environ, exceptions, source, utils from .exceptions import DependencyNeedsBuildingError @@ -933,13 +934,8 @@ def open_recipe(recipe: str | os.PathLike | Path) -> Iterator[Path]: yield recipe elif recipe.suffixes in [[".tar"], [".tar", ".gz"], [".tgz"], [".tar", ".bz2"]]: # extract the recipe to a temporary directory - with TemporaryDirectory() as tmp, tarfile.open(recipe, "r:*") as tar: - # FUTURE: Python 3.12+, remove try-except - try: - tar.extractall(path=tmp, filter="data") - except TypeError: - # TypeError: `filter` is unsupported in this Python version - tar.extractall(path=tmp) + with TemporaryDirectory() as tmp: + extract(recipe, dest_dir=tmp) yield Path(tmp) elif recipe.suffix == ".yaml": # read the recipe from the parent directory diff --git a/conda_build/utils.py b/conda_build/utils.py index 87c7f8957d..12b2040d14 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -15,7 +15,6 @@ import stat import subprocess import sys -import tarfile import tempfile import time import urllib.parse as urlparse @@ -44,7 +43,6 @@ from threading import Thread from typing import TYPE_CHECKING, Iterable, overload -import conda_package_handling.api import filelock import libarchive import yaml @@ -65,6 +63,7 @@ from conda.models.records import PackageRecord from conda.models.version import VersionOrder from conda.utils import unix_path_to_win +from conda_package_handling.api import extract from .exceptions import BuildLockError @@ -446,9 +445,7 @@ def get_recipe_abspath(recipe): ): recipe_dir = tempfile.mkdtemp() if recipe.lower().endswith(CONDA_PACKAGE_EXTENSIONS): - import conda_package_handling.api - - conda_package_handling.api.extract(recipe, recipe_dir) + extract(recipe, dest_dir=recipe_dir) else: tar_xf(recipe, recipe_dir) # At some stage the old build system started to tar up recipes. @@ -789,13 +786,7 @@ def _tar_xf_fallback(tarball, dir_path, mode="r:*"): check_call_env([uncompress, "-f", tarball]) tarball = tarball[:-2] - with tarfile.open(tarball, mode) as tar: - # FUTURE: Python 3.12+, remove try-except - try: - tar.extractall(path=dir_path, filter="data") - except TypeError: - # TypeError: `filter` is unsupported in this Python version - tar.extractall(path=dir_path) + extract(tarball, dest_dir=dir_path) def tar_xf_file(tarball, entries): @@ -1123,13 +1114,9 @@ def package_has_file(package_path, file_path, refresh_mode="modified"): # This version does nothing to the package cache. with TemporaryDirectory() as td: if file_path.startswith("info"): - conda_package_handling.api.extract( - package_path, dest_dir=td, components="info" - ) + extract(package_path, dest_dir=td, components="info") else: - conda_package_handling.api.extract( - package_path, dest_dir=td, components=file_path - ) + extract(package_path, dest_dir=td, components=file_path) resolved_file_path = os.path.join(td, file_path) if os.path.exists(resolved_file_path): # TODO :: Remove this text-mode load. Files are binary.