Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Walk directories in sorted order for reproducibility #517

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 49 additions & 4 deletions src/auditwheel/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import subprocess
import zipfile
from collections.abc import Iterable
from collections.abc import Generator, Iterable
from datetime import datetime, timezone
from typing import Any

Expand All @@ -29,6 +29,50 @@ def unique_by_index(sequence: Iterable[Any]) -> list[Any]:
return uniques


def walk(topdir: str) -> Generator[tuple[str, list[str], list[str]]]:
"""Wrapper for `os.walk` with outputs in reproducible order

Parameters
----------
topdir : str
Root of the directory tree

Yields
------
dirpath : str
Path to a directory
dirnames : list[str]
List of subdirectory names in `dirpath`
filenames : list[str]
List of non-directory file names in `dirpath`
"""
topdir = os.path.normpath(topdir)
for dirpath, dirnames, filenames in os.walk(topdir):
# sort list of dirnames in-place such that `os.walk`
# will recurse into subdirectories in reproducible order
dirnames.sort()
# recurse into any top-level .dist-info subdirectory last
if dirpath == topdir:
subdirs = []
dist_info = []
for dir in dirnames:
if dir.endswith(".dist-info"):
dist_info.append(dir)
else:
subdirs.append(dir)
dirnames[:] = subdirs
dirnames.extend(dist_info)
del dist_info
# sort list of filenames for iteration in reproducible order
filenames.sort()
# list any dist-info/RECORD file last
if dirpath.endswith(".dist-info") and os.path.dirname(dirpath) == topdir:
if "RECORD" in filenames:
filenames.remove("RECORD")
filenames.append("RECORD")
yield dirpath, dirnames, filenames


def zip2dir(zip_fname: str, out_dir: str) -> None:
"""Extract `zip_fname` into output directory `out_dir`

Expand Down Expand Up @@ -69,15 +113,16 @@ def dir2zip(in_dir: str, zip_fname: str, date_time: datetime | None = None) -> N
date_time : Optional[datetime]
Time stamp to set on each file in the archive
"""
in_dir = os.path.normpath(in_dir)
if date_time is None:
st = os.stat(in_dir)
date_time = datetime.fromtimestamp(st.st_mtime, tz=timezone.utc)
date_time_args = date_time.timetuple()[:6]
compression = zipfile.ZIP_DEFLATED
with zipfile.ZipFile(zip_fname, "w", compression=compression) as z:
for root, dirs, files in os.walk(in_dir):
for dir in dirs:
dname = os.path.join(root, dir)
for root, dirs, files in walk(in_dir):
if root != in_dir:
mayeut marked this conversation as resolved.
Show resolved Hide resolved
dname = root
out_dname = os.path.relpath(dname, in_dir) + "/"
zinfo = zipfile.ZipInfo.from_file(dname, out_dname)
zinfo.date_time = date_time_args
Expand Down
12 changes: 6 additions & 6 deletions src/auditwheel/wheeltools.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from ._vendor.wheel.pkginfo import read_pkg_info, write_pkg_info
from .tmpdirs import InTemporaryDirectory
from .tools import dir2zip, unique_by_index, zip2dir
from .tools import dir2zip, unique_by_index, walk, zip2dir

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -69,18 +69,18 @@ def rewrite_record(bdist_dir: str) -> None:
if exists(sig_path):
os.unlink(sig_path)

def walk() -> Generator[str]:
for dir, dirs, files in os.walk(bdist_dir):
for f in files:
yield pjoin(dir, f)
def files() -> Generator[str]:
for dir, _, files in walk(bdist_dir):
for file in files:
yield pjoin(dir, file)

def skip(path: str) -> bool:
"""Wheel hashes every possible file."""
return path == record_relpath

with open(record_path, "w+", newline="", encoding="utf-8") as record_file:
writer = csv.writer(record_file)
for path in walk():
for path in files():
relative_path = relpath(path, bdist_dir)
if skip(relative_path):
hash_ = ""
Expand Down
Loading