Skip to content

Commit

Permalink
implement settings driven download and high level resolver customization
Browse files Browse the repository at this point in the history
  • Loading branch information
Shubh Bapna committed Jul 19, 2024
1 parent 1566623 commit c98eaf0
Show file tree
Hide file tree
Showing 12 changed files with 249 additions and 72 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ jobs:
- build
- build_order
- build_steps
- build_settings
- override
- prebuilt_wheels_alt_server
- report_missing_dependency
Expand Down
4 changes: 4 additions & 0 deletions .mergify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ pull_request_rules:
- check-success=e2e (3.11, 1.75, build)
- check-success=e2e (3.12, 1.75, build)

- check-success=e2e (3.10, 1.75, build_settings)
- check-success=e2e (3.11, 1.75, build_settings)
- check-success=e2e (3.12, 1.75, build_settings)

- check-success=e2e (3.10, 1.75, build_order)
- check-success=e2e (3.11, 1.75, build_order)
- check-success=e2e (3.12, 1.75, build_order)
Expand Down
19 changes: 12 additions & 7 deletions docs/customization.md
Original file line number Diff line number Diff line change
Expand Up @@ -427,13 +427,18 @@ def post_build(

## Customizations using settings.yaml

To use predefined urls to download sources from, instead of overriding the entire `download_source` function, a mapping of package to download source url can be provided directly in settings.yaml:
To use predefined urls to download sources from, instead of overriding the entire `download_source` function, a mapping of package to download source url can be provided directly in settings.yaml. Optionally the downloaded sdist can be renamed. Both the url and the destination filename support templating. The only supported template variable is `version` - it is replaced by the version returned by the resolver.

Additionally, the source distribution index server used by the package resolver can be overriden for a particular package. The resolver can also be told to whether include wheels or sdist sources while trying to resolve the package. Templating is not supported here.

```yaml
download_source:
torch:
url: "https://github.com/pytorch/pytorch/releases/download/v${version}/pytorch-v${version}.tar.gz"
rename_to: "torch-${version}.tar.gz"
packages:
torch:
download_source:
url: "https://github.com/pytorch/pytorch/releases/download/v${version}/pytorch-v${version}.tar.gz"
destination_filename: "torch-${version}.tar.gz"
resolver_dist:
sdist_server_url: "https://pypi.org/simple"
include_wheels: true
include_sdist: false
```
User can define a predefined url for a package from which all its sources will be downloaded from. Optionally they can rename the downloaded sdist to whatever they want. The only supported template variable is `version` - it is replaced by the version returned by the resolver.
9 changes: 9 additions & 0 deletions e2e/build_settings.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
packages:
stevedore:
download_source:
url: https://files.pythonhosted.org/packages/e7/c1/b210bf1071c96ecfcd24c2eeb4c828a2a24bf74b38af13896d02203b1eec/stevedore-${version}.tar.gz
destination_filename: stevedore-custom-${version}.tar.gz
resolver_dist:
sdist_server_url: "https://pypi.org/simple"
include_sdist: false
include_wheels: true
76 changes: 76 additions & 0 deletions e2e/test_build_settings.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash
# -*- indent-tabs-mode: nil; tab-width: 2; sh-indentation: 2; -*-

# Test to show we can build a single wheel, if the dependencies are
# available.

SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

set -x
set -e
set -o pipefail

# Bootstrap to create the build order file.
OUTDIR="$(dirname "$SCRIPTDIR")/e2e-output"

# What are we building?
DIST="stevedore"
VERSION="5.2.0"

# Recreate output directory
rm -rf "$OUTDIR"
mkdir -p "$OUTDIR/build-logs"

# Set up virtualenv with the CLI and dependencies.
tox -e e2e -n -r
source ".tox/e2e/bin/activate"
pip install e2e/post_build_hook

# Bootstrap the test project
fromager \
--sdists-repo="$OUTDIR/sdists-repo" \
--wheels-repo="$OUTDIR/wheels-repo" \
--work-dir="$OUTDIR/work-dir" \
bootstrap "${DIST}==${VERSION}"

# Remove traces of stevedore
rm "$OUTDIR/wheels-repo/downloads/${DIST}"*
rm "$OUTDIR/sdists-repo/downloads/${DIST}"*
rm -r "$OUTDIR/work-dir/${DIST}"*

# Rebuild the wheel mirror without stevedore
pypi-mirror create -d "$OUTDIR/wheels-repo/downloads/" -m "$OUTDIR/wheels-repo/simple/"

# Rebuild the wheel
fromager \
--log-file="$OUTDIR/build.log" \
--sdists-repo="$OUTDIR/sdists-repo" \
--wheels-repo="$OUTDIR/wheels-repo" \
--work-dir="$OUTDIR/work-dir" \
--settings-file="$SCRIPTDIR/build_settings.yaml" \
build "${DIST}" "${VERSION}" "unreachable_url"

EXPECTED_FILES="
wheels-repo/build/stevedore-5.2.0-py3-none-any.whl
sdists-repo/downloads/stevedore-custom-5.2.0.tar.gz
sdists-repo/builds/stevedore-5.2.0.tar.gz
sdists-repo/builds/test-output-file.txt
build.log
"

pass=true
for f in $EXPECTED_FILES; do
if [ ! -f "$OUTDIR/$f" ]; then
echo "FAIL: Did not find $OUTDIR/$f" 1>&2
pass=false
fi
done

if $pass; then
if ! grep -q "${DIST}==${VERSION}" $OUTDIR/sdists-repo/builds/test-output-file.txt; then
echo "FAIL: Did not find content in post-build hook output file" 1>&2
pass=false
fi
fi

$pass
2 changes: 1 addition & 1 deletion src/fromager/commands/list_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ def list_overrides(
):
"""List all of the packages with overrides in the current configuration."""
for name in overrides.list_all(
wkctx.patches_dir, wkctx.envs_dir, wkctx.settings.download_source()
wkctx.patches_dir, wkctx.envs_dir, wkctx.settings.package()
):
print(name)
4 changes: 2 additions & 2 deletions src/fromager/overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def find_override_method(distname: str, method: str) -> typing.Callable:
def list_all(
patches_dir: pathlib.Path,
envs_dir: pathlib.Path,
download_source: dict[str, dict[str, str]],
settings: dict[str, dict],
test: bool = False,
):
exts = _get_extensions()
Expand Down Expand Up @@ -187,7 +187,7 @@ def env_projects():
yield item.stem

def projects_with_predefined_download_source():
yield from download_source
yield from settings

# Use canonicalize_name() to ensure we can correctly remove duplicate
# entries from the return list.
Expand Down
64 changes: 52 additions & 12 deletions src/fromager/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,58 @@ def pre_built(self, variant: str) -> set[str]:
names = p.get(variant) or []
return set(overrides.pkgname_to_override_module(n) for n in names)

def download_source(self) -> dict[str, dict[str, str]]:
return self._data.get("download_source") or {}

def sdist_download_url(self, pkg: str) -> str | None:
p = self.download_source()
download_source = p.get(pkg) or {}
return download_source.get("url")

def sdist_local_filename(self, pkg: str) -> str | None:
p = self.download_source()
download_source = p.get(pkg) or {}
return download_source.get("rename_to")
def package(self) -> dict[str, dict[str, str]]:
return self._return_value_or_default(self._data.get("packages"), {})

def download_source_url(self, pkg: str, default: str | None = None) -> str | None:
download_source = self._get_package_download_source_settings(pkg)
return self._return_value_or_default(download_source.get("url"), default)

def download_source_destination_filename(
self, pkg: str, default: str | None = None
) -> str | None:
download_source = self._get_package_download_source_settings(pkg)
return self._return_value_or_default(
download_source.get("destination_filename"), default
)

def resolver_sdist_server_url(
self, pkg: str, default: str | None = None
) -> str | None:
resolve_dist = self._get_package_resolver_dist_settings(pkg)
return self._return_value_or_default(
resolve_dist.get("sdist_server_url"), default
)

def resolver_include_wheels(
self, pkg: str, default: bool | None = None
) -> bool | None:
resolve_dist = self._get_package_resolver_dist_settings(pkg)
return self._return_value_or_default(
resolve_dist.get("include_wheels"), default
)

def resolver_include_sdist(
self, pkg: str, default: bool | None = None
) -> bool | None:
resolve_dist = self._get_package_resolver_dist_settings(pkg)
return self._return_value_or_default(resolve_dist.get("include_sdist"), default)

def _get_package_settings(self, pkg: str) -> dict[str, dict[str, str]]:
p = self.package()
return self._return_value_or_default(p.get(pkg), {})

def _get_package_download_source_settings(self, pkg: str) -> dict[str, str]:
p = self._get_package_settings(pkg)
return self._return_value_or_default(p.get("download_source"), {})

def _get_package_resolver_dist_settings(self, pkg: str) -> dict[str, str]:
p = self._get_package_settings(pkg)
return self._return_value_or_default(p.get("resolver_dist"), {})

def _return_value_or_default(self, value, default):
# can't use the "or" method since certain values can be false. Need to explicitly check for None
return value if value is not None else default


def _parse(content: str) -> Settings:
Expand Down
35 changes: 21 additions & 14 deletions src/fromager/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,23 +141,26 @@ def default_download_source(
sdist_server_url: str,
) -> tuple[pathlib.Path, str, str]:
"Download the requirement and return the name of the output path."
url_template = ctx.settings.sdist_download_url(req.name)
rename_to_template = ctx.settings.sdist_local_filename(req.name)

# don't include sdists if the user wants to download source from a predefined url
include_sdists = url_template is None
include_wheels = not include_sdists
include_sdists = ctx.settings.resolver_include_sdist(req.name, True)
include_wheels = ctx.settings.resolver_include_wheels(req.name, False)
override_sdist_server_url = ctx.settings.resolver_sdist_server_url(
req.name, sdist_server_url
)

org_url, version = resolve_dist(
ctx, req, sdist_server_url, include_sdists, include_wheels
ctx, req, override_sdist_server_url, include_sdists, include_wheels
)

url = _resolve_template(url_template, req, version) or org_url
dest_filename_template = ctx.settings.download_source_destination_filename(req.name)
destination_filename = _resolve_template(dest_filename_template, req, version)
url_template = ctx.settings.download_source_url(req.name, org_url)
url = _resolve_template(url_template, req, version)
logger.debug(f"{req.name}: using {url} instead of {org_url}")

rename_to = _resolve_template(rename_to_template, req, version)
source_filename = _download_source_check(
ctx.sdists_downloads, url, destination_filename
)

source_filename = _download_source_check(ctx.sdists_downloads, url, rename_to)
logger.debug(
f"{req.name}: have source for {req} version {version} in {source_filename}"
)
Expand All @@ -167,9 +170,9 @@ def default_download_source(
# Helper method to check whether .zip /.tar / .tgz is able to extract and check its content.
# It will throw exception if any other file is encountered. Eg: index.html
def _download_source_check(
destination_dir: pathlib.Path, url: str, rename_to: str | None = None
destination_dir: pathlib.Path, url: str, destination_filename: str | None = None
) -> str:
source_filename = download_url(destination_dir, url, rename_to)
source_filename = download_url(destination_dir, url, destination_filename)
if source_filename.suffix == ".zip":
source_file_contents = zipfile.ZipFile(source_filename).namelist()
if not source_file_contents:
Expand All @@ -187,9 +190,13 @@ def _download_source_check(


def download_url(
destination_dir: pathlib.Path, url: str, rename_to: str | None = None
destination_dir: pathlib.Path, url: str, destination_filename: str | None = None
) -> pathlib.Path:
basename = rename_to if rename_to else os.path.basename(urlparse(url).path)
basename = (
destination_filename
if destination_filename
else os.path.basename(urlparse(url).path)
)
outfile = pathlib.Path(destination_dir) / basename
logger.debug(
"looking for %s %s", outfile, "(exists)" if outfile.exists() else "(not there)"
Expand Down
10 changes: 5 additions & 5 deletions tests/test_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ def test_list_all(tmp_path: pathlib.Path):
project_env2 = variant_dir / "fromager_test.env"
project_env2.write_text("VAR1=VALUE1\nVAR2=VALUE2") # duplicate

download_source = {
"project-with-download-source": {"url": "url"},
"another-project-with-download-source": {"url": "url"},
settings = {
"project-with-download-source": {"download_source": {"url": "url"}},
"project-with-resolver-dist": {"resolver_dist": {"include_wheels": "true"}},
}

expected = [
Expand All @@ -137,14 +137,14 @@ def test_list_all(tmp_path: pathlib.Path):
"project-with-env",
"fromager-test",
"project-with-download-source",
"another-project-with-download-source",
"project-with-resolver-dist",
]
expected.sort()

packages = overrides.list_all(
patches_dir=patches_dir,
envs_dir=env_dir,
download_source=download_source,
settings=settings,
test=True,
)

Expand Down
54 changes: 44 additions & 10 deletions tests/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,57 @@ def test_with_pre_built():
def test_with_download_source():
s = settings._parse(
textwrap.dedent("""
download_source:
packages:
foo:
url: url
rename_to: rename
download_source:
url: url
destination_filename: rename
""")
)
assert s.sdist_local_filename("foo") == "rename"
assert s.sdist_local_filename("bar") is None
assert s.sdist_download_url("foo") == "url"
assert s.sdist_download_url("bar") is None
assert s.download_source_destination_filename("foo") == "rename"
assert s.download_source_destination_filename("bar") is None
assert s.download_source_url("foo") == "url"
assert s.download_source_url("bar") is None


def test_no_download_source():
s = settings._parse(
textwrap.dedent("""
pre_built:
packages:
""")
)
assert s.download_source_destination_filename("foo") is None
assert s.download_source_url("foo") is None


def test_with_resolver_dist():
s = settings._parse(
textwrap.dedent("""
packages:
foo:
resolver_dist:
sdist_server_url: url
include_sdist: true
include_wheels: false
""")
)
assert type(s.resolver_include_sdist("foo")) is bool
assert s.resolver_include_sdist("foo")
assert type(s.resolver_include_wheels("foo")) is bool
assert not s.resolver_include_wheels("foo")
assert s.resolver_sdist_server_url("foo") == "url"


def test_no_resolver_dist():
s = settings._parse(
textwrap.dedent("""
packages:
foo:
download_source:
url: url
destination_filename: rename
""")
)
assert s.sdist_local_filename("foo") is None
assert s.sdist_download_url("foo") is None
assert s.resolver_include_sdist("foo") is None
assert s.resolver_include_wheels("foo") is None
assert s.resolver_sdist_server_url("foo") is None
Loading

0 comments on commit c98eaf0

Please sign in to comment.