Skip to content

Commit

Permalink
Use wheels for gathering licenses
Browse files Browse the repository at this point in the history
This makes the license gathering process *much* faster, and also avoids
any bootstrapping issues that source distributions may have.
  • Loading branch information
pradyunsg committed Oct 12, 2021
1 parent 7b7237d commit f89b90c
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 62 deletions.
77 changes: 26 additions & 51 deletions src/vendoring/tasks/license.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,23 @@
import shutil
import tarfile
import tempfile
import zipfile
from pathlib import Path
from typing import Dict, Iterable, Union
from typing import Dict, Iterable

import requests

from vendoring.configuration import Configuration
from vendoring.ui import UI
from vendoring.utils import run

SDistMember = Union[tarfile.TarInfo, zipfile.ZipInfo]
SDistArchive = Union[tarfile.TarFile, zipfile.ZipFile]


def download_sdists(location: Path, requirements: Path) -> None:
def download_wheels(location: Path, requirements: Path) -> None:
cmd = [
"pip",
"download",
"-r",
str(requirements),
"--no-binary",
"--only-binary",
":all:",
"--no-deps",
"--dest",
Expand All @@ -42,12 +38,12 @@ def get_library_name_from_directory(dirname: str) -> str:

def extract_license_member(
destination: Path,
tar: SDistArchive,
member: SDistMember,
wheel: zipfile.ZipFile,
member: zipfile.ZipInfo,
name: str,
license_directories: Dict[str, str],
) -> None:
mpath = Path(name) # relative path inside the sdist
mpath = Path(name) # relative path inside the wheel

dirname = list(mpath.parents)[-2].name # -1 is .
libname = get_library_name_from_directory(dirname)
Expand All @@ -57,26 +53,18 @@ def extract_license_member(
)

UI.log("Extracting {} into {}".format(name, dest.relative_to(destination)))
try:
fileobj = tar.extractfile(member) # type: ignore
dest.write_bytes(fileobj.read()) # type: ignore
except AttributeError: # zipfile
dest.write_bytes(tar.read(member)) # type: ignore
dest.write_bytes(wheel.read(member))


def find_and_extract_license(
destination: Path,
tar: SDistArchive,
members: Iterable[SDistMember],
tar: zipfile.ZipFile,
members: Iterable[zipfile.ZipInfo],
license_directories: Dict[str, str],
) -> bool:
found = False
for member in members:
try:
license_directories,
name = member.name # type: ignore
except AttributeError: # zipfile
name = member.filename # type: ignore
name = member.filename
if "LICENSE" in name or "COPYING" in name:
if "/test" in name:
# some testing licenses in html5lib and distlib
Expand Down Expand Up @@ -112,12 +100,12 @@ def download_from_url(url: str, dest: Path) -> None:

def get_license_fallback(
destination: Path,
sdist_name: str,
wheel_name: str,
license_directories: Dict[str, str],
license_fallback_urls: Dict[str, str],
) -> None:
"""Hardcoded license URLs. Check when updating if those are still needed"""
libname = get_library_name_from_directory(sdist_name)
libname = get_library_name_from_directory(wheel_name)
if libname not in license_fallback_urls:
raise ValueError("No hardcoded URL for {} license".format(libname))

Expand All @@ -128,38 +116,25 @@ def get_license_fallback(
download_from_url(url, dest)


def extract_license_from_sdist(
def extract_license_from_wheel(
destination: Path,
sdist: Path,
wheel: Path,
license_directories: Dict[str, str],
license_fallback_urls: Dict[str, str],
) -> None:
def extract_from_source_tarfile(sdist: Path) -> bool:
ext = sdist.suffixes[-1][1:]
with tarfile.open(sdist, mode="r:{}".format(ext)) as tar:
return find_and_extract_license(
destination, tar, tar.getmembers(), license_directories
)

def extract_from_source_zipfile(sdist: Path) -> bool:
with zipfile.ZipFile(sdist) as zip:
return find_and_extract_license(
destination, zip, zip.infolist(), license_directories
)

if sdist.suffixes[-2:-1] == [".tar"]:
found = extract_from_source_tarfile(sdist)
elif sdist.suffixes[-1] == ".zip":
found = extract_from_source_zipfile(sdist)
else:
raise NotImplementedError("new sdist type!")
assert wheel.suffix == ".whl"

with zipfile.ZipFile(wheel) as zip:
found = find_and_extract_license(
destination, zip, zip.infolist(), license_directories
)

if found:
return

UI.log("License not found in {}".format(sdist.name))
UI.log("License not found in {}".format(wheel.name))
get_license_fallback(
destination, sdist.name, license_directories, license_fallback_urls
destination, wheel.name, license_directories, license_fallback_urls
)


Expand All @@ -170,11 +145,11 @@ def fetch_licenses(config: Configuration) -> None:
requirements = config.requirements

tmp_dir = Path(tempfile.gettempdir(), "vendoring-downloads")
download_sdists(tmp_dir, requirements)
download_wheels(tmp_dir, requirements)

for sdist in tmp_dir.iterdir():
extract_license_from_sdist(
destination, sdist, license_directories, license_fallback_urls
for wheel in tmp_dir.iterdir():
extract_license_from_wheel(
destination, wheel, license_directories, license_fallback_urls
)

shutil.rmtree(tmp_dir)
2 changes: 1 addition & 1 deletion tests/sample-projects/import_rewriting/vendor.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
retrying==1.3.3
packaging==20.4
six==1.15.0
3 changes: 0 additions & 3 deletions tests/sample-projects/licenses/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,5 @@ destination = "vendored/"
requirements = "vendor.txt"
namespace = "licenses.vendored"

[tool.vendoring.license.directories]
msgpack-python = "msgpack"

[tool.vendoring.license.fallback-urls]
webencodings = "https://github.com/SimonSapin/python-webencodings/raw/master/LICENSE"
2 changes: 1 addition & 1 deletion tests/sample-projects/licenses/vendor.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
appdirs==1.4.4
six==1.15.0
msgpack==1.0.0
tomli==1.2.1
webencodings==0.5.1
22 changes: 16 additions & 6 deletions tests/test_sample_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,25 @@ def test_import_rewriting(tmp_path, monkeypatch):
vendored = tmp_path / "vendored"
assert vendored.exists()
assert sorted(os.listdir(vendored)) == [
"retrying.LICENSE",
"retrying.py",
"retrying.pyi",
"packaging",
"six.LICENSE",
"six.py",
"six.pyi",
]

interesting_file = vendored / "packaging" / "requirements.py"
interesting_lineno = 12
with interesting_file.open() as f:
iterable = iter(f)
for _ in range(interesting_lineno - 1):
next(iterable)
interesting_line = next(iterable)

expected_line = (
"from import_rewriting.vendored.six.moves.urllib import parse as urlparse\n"
)
assert interesting_line == expected_line


def test_licenses(tmp_path, monkeypatch):
shutil.copytree(SAMPLE_PROJECTS / "licenses", tmp_path, dirs_exist_ok=True)
Expand All @@ -91,16 +102,15 @@ def test_licenses(tmp_path, monkeypatch):
"appdirs.LICENSE.txt",
"appdirs.py",
"appdirs.pyi",
"msgpack",
"msgpack.pyi",
"six.LICENSE",
"six.py",
"six.pyi",
"tomli",
"webencodings",
"webencodings.pyi",
]

assert (vendored / "msgpack" / "COPYING").exists()
assert (vendored / "tomli" / "LICENSE").exists()
assert (vendored / "webencodings" / "LICENSE").exists()


Expand Down

0 comments on commit f89b90c

Please sign in to comment.