Skip to content

Commit

Permalink
Merge pull request #774 from ChrisCummins/fix/CVE-2007-4559
Browse files Browse the repository at this point in the history
Refactored CVE-2007-4559 Patch
  • Loading branch information
ChrisCummins authored Nov 2, 2022
2 parents 72fb339 + fe1cf21 commit b017c81
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 3 deletions.
3 changes: 2 additions & 1 deletion compiler_gym/envs/llvm/datasets/cbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from compiler_gym.third_party import llvm
from compiler_gym.util.commands import Popen
from compiler_gym.util.download import download
from compiler_gym.util.filesystem import extract_tar
from compiler_gym.util.runfiles_path import cache_path, site_data_path
from compiler_gym.util.timer import Timer

Expand Down Expand Up @@ -242,7 +243,7 @@ def download_cBench_runtime_data() -> bool:
tar_contents = io.BytesIO(download(url, sha256))
with tarfile.open(fileobj=tar_contents, mode="r:bz2") as tar:
cbench_data.parent.mkdir(parents=True, exist_ok=True)
tar.extractall(cbench_data.parent)
extract_tar(tar, cbench_data.parent)
assert cbench_data.is_dir()
# Create the marker file to indicate that the directory is unpacked
# and ready to go.
Expand Down
3 changes: 2 additions & 1 deletion compiler_gym/third_party/llvm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from fasteners import InterProcessLock

from compiler_gym.util.download import download
from compiler_gym.util.filesystem import extract_tar
from compiler_gym.util.runfiles_path import cache_path, site_data_path

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -50,7 +51,7 @@ def _download_llvm_files(destination: Path) -> Path:
tar_contents = io.BytesIO(download(_LLVM_URL, sha256=_LLVM_SHA256))
destination.parent.mkdir(parents=True, exist_ok=True)
with tarfile.open(fileobj=tar_contents, mode="r:bz2") as tar:
tar.extractall(destination)
extract_tar(tar, destination)

assert destination.is_dir()
assert (destination / "LICENSE").is_file()
Expand Down
18 changes: 18 additions & 0 deletions compiler_gym/util/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,21 @@ def is_in_memory(path: Path) -> bool:
# transient_cache_path() case. There will be false negatives, though not
# likely false positives.
return str(path).startswith("/dev/shm")


def is_within_directory(directory, target) -> bool:
abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])

return prefix == abs_directory


def extract_tar(tar, path=".", members=None, *, numeric_owner=False) -> None:
for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)
3 changes: 2 additions & 1 deletion examples/gnn_cost_model/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import compiler_gym.util.flags.nproc # noqa flag definition
from compiler_gym.util.download import download
from compiler_gym.util.filesystem import extract_tar
from compiler_gym.util.runfiles_path import cache_path, transient_cache_path
from compiler_gym.util.timer import Timer, humanize_duration

Expand Down Expand Up @@ -145,7 +146,7 @@ def download_and_unpack_database(db: str, sha256: str) -> Path:
local_dir.mkdir(parents=True, exist_ok=True)
logger.info("Unpacking database to %s ...", local_dir)
with tarfile.open(fileobj=tar_data, mode="r:bz2") as arc:
arc.extractall(str(local_dir))
extract_tar(arc, str(local_dir))

(local_dir / ".installed").touch()

Expand Down

0 comments on commit b017c81

Please sign in to comment.