diff --git a/src/auditwheel/tools.py b/src/auditwheel/tools.py index 04e5d896..24b3e8e3 100644 --- a/src/auditwheel/tools.py +++ b/src/auditwheel/tools.py @@ -4,7 +4,7 @@ import os import subprocess import zipfile -from collections.abc import Iterable +from collections.abc import Generator, Iterable from datetime import datetime, timezone from typing import Any @@ -29,6 +29,50 @@ def unique_by_index(sequence: Iterable[Any]) -> list[Any]: return uniques +def walk(topdir: str) -> Generator[tuple[str, list[str], list[str]]]: + """Wrapper for `os.walk` with outputs in reproducible order + + Parameters + ---------- + topdir : str + Root of the directory tree + + Yields + ------ + dirpath : str + Path to a directory + dirnames : list[str] + List of subdirectory names in `dirpath` + filenames : list[str] + List of non-directory file names in `dirpath` + """ + topdir = os.path.normpath(topdir) + for dirpath, dirnames, filenames in os.walk(topdir): + # sort list of dirnames in-place such that `os.walk` + # will recurse into subdirectories in reproducible order + dirnames.sort() + # recurse into any top-level .dist-info subdirectory last + if dirpath == topdir: + subdirs = [] + dist_info = [] + for dir in dirnames: + if dir.endswith(".dist-info"): + dist_info.append(dir) + else: + subdirs.append(dir) + dirnames[:] = subdirs + dirnames.extend(dist_info) + del dist_info + # sort list of filenames for iteration in reproducible order + filenames.sort() + # list any dist-info/RECORD file last + if dirpath.endswith(".dist-info") and os.path.dirname(dirpath) == topdir: + if "RECORD" in filenames: + filenames.remove("RECORD") + filenames.append("RECORD") + yield dirpath, dirnames, filenames + + def zip2dir(zip_fname: str, out_dir: str) -> None: """Extract `zip_fname` into output directory `out_dir` @@ -69,15 +113,16 @@ def dir2zip(in_dir: str, zip_fname: str, date_time: datetime | None = None) -> N date_time : Optional[datetime] Time stamp to set on each file in the archive """ + in_dir = os.path.normpath(in_dir) if date_time is None: st = os.stat(in_dir) date_time = datetime.fromtimestamp(st.st_mtime, tz=timezone.utc) date_time_args = date_time.timetuple()[:6] compression = zipfile.ZIP_DEFLATED with zipfile.ZipFile(zip_fname, "w", compression=compression) as z: - for root, dirs, files in os.walk(in_dir): - for dir in dirs: - dname = os.path.join(root, dir) + for root, dirs, files in walk(in_dir): + if root != in_dir and not (dirs or files): + dname = root out_dname = os.path.relpath(dname, in_dir) + "/" zinfo = zipfile.ZipInfo.from_file(dname, out_dname) zinfo.date_time = date_time_args diff --git a/src/auditwheel/wheeltools.py b/src/auditwheel/wheeltools.py index 063d3493..70180b89 100644 --- a/src/auditwheel/wheeltools.py +++ b/src/auditwheel/wheeltools.py @@ -25,7 +25,7 @@ from ._vendor.wheel.pkginfo import read_pkg_info, write_pkg_info from .tmpdirs import InTemporaryDirectory -from .tools import dir2zip, unique_by_index, zip2dir +from .tools import dir2zip, unique_by_index, walk, zip2dir logger = logging.getLogger(__name__) @@ -69,10 +69,10 @@ def rewrite_record(bdist_dir: str) -> None: if exists(sig_path): os.unlink(sig_path) - def walk() -> Generator[str]: - for dir, dirs, files in os.walk(bdist_dir): - for f in files: - yield pjoin(dir, f) + def files() -> Generator[str]: + for dir, _, files in walk(bdist_dir): + for file in files: + yield pjoin(dir, file) def skip(path: str) -> bool: """Wheel hashes every possible file.""" @@ -80,7 +80,7 @@ def skip(path: str) -> bool: with open(record_path, "w+", newline="", encoding="utf-8") as record_file: writer = csv.writer(record_file) - for path in walk(): + for path in files(): relative_path = relpath(path, bdist_dir) if skip(relative_path): hash_ = ""