From e97a75b8e98acb656bac3107fcaae8cc8266786c Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Sat, 9 Dec 2023 15:35:04 +0900
Subject: [PATCH 01/81] add some thoughts
---
python/private/bzlmod/minihub.bzl | 71 +++++++++++++++++++++++++++++++
1 file changed, 71 insertions(+)
create mode 100644 python/private/bzlmod/minihub.bzl
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
new file mode 100644
index 000000000..391a13fa3
--- /dev/null
+++ b/python/private/bzlmod/minihub.bzl
@@ -0,0 +1,71 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""The overall design is:
+
+There is a single Pip hub repository, which creates the following repos:
+* `whl_index` that downloads the SimpleAPI page for a particular package
+ from the given indexes. It creates labels with URLs that can be used
+ to download things. Args:
+ * distribution - The name of the distribution.
+ * version - The version of the package.
+* `whl_archive` that downloads a particular wheel for a package, it accepts
+ the following args:
+ * sha256 - The sha256 to download.
+ * url - The url to use. Optional.
+ * url_file - The label that has the URL for downloading the wheel. Optional.
+ Mutually exclusive with the url arg.
+ * indexes - Indexes to query. Optional.
+* `whl_library` that extracts a particular wheel.
+
+This is created to make use of the parallelism that can be achieved if fetching
+is done in separate threads, one for each external repository.
+"""
+
+def whl_library(name, requirement, python_version, python_interpreter_target, **kwargs):
+ """Generate a number of third party repos for a particular wheel."""
+ indexes = kwargs.get("indexes", ["https://pypi.org/simple"])
+ sha256s = requirement.split("--hash=sha256:")[1:]
+ distribution, _, version_and_tail = requirement.partition("==")
+ version, _, _ = version_and_tail.partition(" ")
+
+ # Defines targets:
+ # * whl - depending on the platform, return the correct whl defined in "name_sha.whl"
+ # * pkg - depending on the platform, return the correct py_library target in "name_sha"
+ # * dist_info - depending on the platform, return the correct py_library target in "name_sha"
+ # * data - depending on the platform, return the correct py_library target in "name_sha"
+ whl_index(
+ name = name,
+ sha256s = sha256s,
+ indexes = indexes,
+ version = version,
+ python_version = python_version, # used to get the right wheels
+ )
+
+ for sha256 in sha256s:
+ # We would use http_file, but we are passing the URL to use via a file,
+ # if the url is known (in case of using pdm lock), we could use an
+ # http_file.
+ whl_archive(
+ name = "{}_{}.whl".format(name, sha256),
+ distribution = distribution,
+ url_file = "{name}//:_{sha256}_url".format(name = name, sha256 = sha256),
+ )
+
+ _whl_library(
+ name = "{name}_{sha256}".format(name = name, sha256 = sha256),
+ file = "{name}_{sha256}//:whl".format(name = name, sha256 = sha256),
+ python_interpreter_target = python_interpreter_target,
+ **kwargs
+ )
From ea0f2ba98da3253b9e46704af7e9676bef0901f2 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Sat, 9 Dec 2023 16:08:06 +0900
Subject: [PATCH 02/81] wip
---
python/pip_install/pip_repository.bzl | 4 +++
python/private/bzlmod/minihub.bzl | 51 +++++++++++++++++++++------
python/private/bzlmod/pip.bzl | 3 +-
3 files changed, 46 insertions(+), 12 deletions(-)
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index 07e3353c7..266519b98 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -799,6 +799,10 @@ whl_library_attrs = {
),
allow_files = True,
),
+ "file": attr.label(
+ doc = "The label of the whl file to use",
+ allow_single_file = True,
+ ),
"group_deps": attr.string_list(
doc = "List of dependencies to skip in order to break the cycles within a dependency group.",
default = [],
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index 391a13fa3..2f91cfceb 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -32,25 +32,29 @@ There is a single Pip hub repository, which creates the following repos:
This is created to make use of the parallelism that can be achieved if fetching
is done in separate threads, one for each external repository.
"""
+load("//python/pip_install:pip_repository.bzl", _whl_library = "whl_library")
-def whl_library(name, requirement, python_version, python_interpreter_target, **kwargs):
- """Generate a number of third party repos for a particular wheel."""
+def whl_library(name, distribution, requirement, **kwargs):
+ """Generate a number of third party repos for a particular wheel.
+ """
indexes = kwargs.get("indexes", ["https://pypi.org/simple"])
- sha256s = requirement.split("--hash=sha256:")[1:]
- distribution, _, version_and_tail = requirement.partition("==")
- version, _, _ = version_and_tail.partition(" ")
+ sha256s = [sha.strip() for sha in requirement.split("--hash=sha256:")[1:]]
# Defines targets:
# * whl - depending on the platform, return the correct whl defined in "name_sha.whl"
# * pkg - depending on the platform, return the correct py_library target in "name_sha"
# * dist_info - depending on the platform, return the correct py_library target in "name_sha"
# * data - depending on the platform, return the correct py_library target in "name_sha"
+ #
+ # Needs:
+ # * Select on the Python interpreter version
+ # * Select on the glibc/musllibc or ask the user to provide whether they want musllibc or glibc at init
+ # * Select on the platform
whl_index(
name = name,
+ distribution = distribution,
sha256s = sha256s,
indexes = indexes,
- version = version,
- python_version = python_version, # used to get the right wheels
)
for sha256 in sha256s:
@@ -59,13 +63,38 @@ def whl_library(name, requirement, python_version, python_interpreter_target, **
# http_file.
whl_archive(
name = "{}_{}.whl".format(name, sha256),
- distribution = distribution,
- url_file = "{name}//:_{sha256}_url".format(name = name, sha256 = sha256),
+ url_file = "@{name}//:_{sha256}_url".format(name = name, sha256 = sha256),
+ sha256 = sha256,
)
_whl_library(
name = "{name}_{sha256}".format(name = name, sha256 = sha256),
- file = "{name}_{sha256}//:whl".format(name = name, sha256 = sha256),
- python_interpreter_target = python_interpreter_target,
+ file = "@{name}_{sha256}//:whl".format(name = name, sha256 = sha256),
+ requirement = requirement, # do we need this?
**kwargs
)
+
+def _whl_index_impl(_rctx):
+ fail("TODO")
+
+whl_index = repository_rule(
+ attrs = {
+ "distribution": attr.string(mandatory=True),
+ "indexes": attr.string_list(mandatory=True),
+ "sha256s": attr.string_list(mandatory=True),
+ },
+ doc = """A rule for bzlmod mulitple pip repository creation. PRIVATE USE ONLY.""",
+ implementation = _whl_index_impl,
+)
+
+def _whl_archive_impl(_rctx):
+ fail("TODO")
+
+whl_archive = repository_rule(
+ attrs = {
+ "sha256": attr.string(mandatory=False),
+ "url_file": attr.label(mandatory=True),
+ },
+ doc = """A rule for bzlmod mulitple pip repository creation. PRIVATE USE ONLY.""",
+ implementation = _whl_archive_impl,
+)
diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl
index 305039fb2..0a24a8c04 100644
--- a/python/private/bzlmod/pip.bzl
+++ b/python/private/bzlmod/pip.bzl
@@ -22,13 +22,13 @@ load(
"locked_requirements_label",
"pip_repository_attrs",
"use_isolated",
- "whl_library",
)
load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
load("//python/private:full_version.bzl", "full_version")
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
load("//python/private:version_label.bzl", "version_label")
+load(":minihub.bzl", "whl_library")
load(":pip_repository.bzl", "pip_repository")
def _whl_mods_impl(mctx):
@@ -150,6 +150,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides):
whl_library(
name = "%s_%s" % (pip_name, whl_name),
+ distribution = whl_name,
requirement = requirement_line,
repo = pip_name,
repo_prefix = pip_name + "_",
From bc41145e3c1627a7d48d0f3673c7990650208773 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Sat, 9 Dec 2023 16:25:59 +0900
Subject: [PATCH 03/81] continue to spike
---
python/private/bzlmod/minihub.bzl | 40 +++++++++++++++++++++++++++++--
1 file changed, 38 insertions(+), 2 deletions(-)
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index 2f91cfceb..b763a380a 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -74,8 +74,44 @@ def whl_library(name, distribution, requirement, **kwargs):
**kwargs
)
-def _whl_index_impl(_rctx):
- fail("TODO")
+def _whl_index_impl(rctx):
+ files = []
+ want_shas = {sha: True for sha in rctx.attr.sha256s}
+ for i, index_url in enumerate(rctx.attr.indexes):
+ html = "index-{}.html".format(i)
+ result = rctx.download(
+ url=index_url + "/" + rctx.attr.distribution,
+ output=html,
+ )
+ if not result.success:
+ fail(result)
+
+ contents = rctx.read(html)
+ _, _, hrefs = contents.partition("
Date: Sat, 9 Dec 2023 22:11:40 +0900
Subject: [PATCH 04/81] continue experimenting, the group libraries do not work
but I think it may be possible to get them working
---
python/pip_install/pip_repository.bzl | 31 +++--
python/private/bzlmod/minihub.bzl | 174 +++++++++++++++++++++++---
python/private/text_util.bzl | 13 +-
3 files changed, 187 insertions(+), 31 deletions(-)
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index 266519b98..76da7af65 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -683,18 +683,26 @@ def _whl_library_impl(rctx):
# Manually construct the PYTHONPATH since we cannot use the toolchain here
environment = _create_repository_execution_environment(rctx, python_interpreter)
- result = rctx.execute(
- args,
- environment = environment,
- quiet = rctx.attr.quiet,
- timeout = rctx.attr.timeout,
- )
- if result.return_code:
- fail("whl_library %s failed: %s (%s) error code: '%s'" % (rctx.attr.name, result.stdout, result.stderr, result.return_code))
- whl_path = rctx.path(json.decode(rctx.read("whl_file.json"))["whl_file"])
- if not rctx.delete("whl_file.json"):
- fail("failed to delete the whl_file.json file")
+ whl_path = None
+ if rctx.attr.file:
+ whl_path = rctx.path(rctx.attr.file).realpath
+ if whl_path.basename.endswith("tar.gz"):
+ whl_path = None
+
+ if whl_path == None:
+ result = rctx.execute(
+ args,
+ environment = environment,
+ quiet = rctx.attr.quiet,
+ timeout = rctx.attr.timeout,
+ )
+ if result.return_code:
+ fail("whl_library %s failed: %s (%s) error code: '%s'" % (rctx.attr.name, result.stdout, result.stderr, result.return_code))
+
+ whl_path = rctx.path(json.decode(rctx.read("whl_file.json"))["whl_file"])
+ if not rctx.delete("whl_file.json"):
+ fail("failed to delete the whl_file.json file")
if rctx.attr.whl_patches:
patches = {}
@@ -801,7 +809,6 @@ whl_library_attrs = {
),
"file": attr.label(
doc = "The label of the whl file to use",
- allow_single_file = True,
),
"group_deps": attr.string_list(
doc = "List of dependencies to skip in order to break the cycles within a dependency group.",
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index b763a380a..353a257b7 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -33,8 +33,61 @@ This is created to make use of the parallelism that can be achieved if fetching
is done in separate threads, one for each external repository.
"""
load("//python/pip_install:pip_repository.bzl", _whl_library = "whl_library")
+load("//python/private:text_util.bzl", "render")
+load("//python/private:parse_whl_name.bzl", "parse_whl_name")
-def whl_library(name, distribution, requirement, **kwargs):
+_this = str(Label("//:unknown"))
+
+def _label(label):
+ """This function allows us to construct labels to pass to rules."""
+ prefix, _, _ = _this.partition("//")
+ prefix = prefix + "~pip~"
+ return Label(label.replace("@", prefix))
+
+_os_in_tag = {
+ "linux": "linux",
+ "manylinux": "linux",
+ "win": "windows",
+ "macosx": "osx",
+ "musllinux": "linux",
+}
+
+_cpu_in_tag = {
+ "amd64": "x86_64",
+ "x86_64": "x86_64",
+ "i686": "x86_32",
+ "i386": "x86_32",
+ "s390x": "s390x",
+ "ppc64le": "ppc",
+ "arm64": "aarch64",
+ "aarch64": "aarch64",
+ "win32": "x86_32",
+}
+
+def _parse_os_from_tag(platform_tag):
+ for prefix, os in _os_in_tag.items():
+ if platform_tag.startswith(prefix):
+ return os
+
+ fail("cannot get os from platform tag: {}".format(platform_tag))
+
+def _parse_cpu_from_tag(platform_tag):
+ if "universal2" in platform_tag:
+ return ("x86_64", "aarch64")
+
+ for suffix, cpu in _cpu_in_tag.items():
+ if platform_tag.endswith(suffix):
+ return (cpu,)
+
+ fail("cannot get cpu from platform tag: {}".format(platform_tag))
+
+def _parse_platform_tag(platform_tag):
+ os = _parse_os_from_tag(platform_tag)
+
+ cpu = _parse_cpu_from_tag(platform_tag)
+ return os, cpu
+
+def whl_library(name, distribution, requirement, repo, **kwargs):
"""Generate a number of third party repos for a particular wheel.
"""
indexes = kwargs.get("indexes", ["https://pypi.org/simple"])
@@ -55,22 +108,26 @@ def whl_library(name, distribution, requirement, **kwargs):
distribution = distribution,
sha256s = sha256s,
indexes = indexes,
+ repo = repo,
)
for sha256 in sha256s:
+ whl_repo = "{}_{}_whl".format(name, sha256)
+
# We would use http_file, but we are passing the URL to use via a file,
# if the url is known (in case of using pdm lock), we could use an
# http_file.
whl_archive(
- name = "{}_{}.whl".format(name, sha256),
- url_file = "@{name}//:_{sha256}_url".format(name = name, sha256 = sha256),
+ name = whl_repo,
+ url_file = _label("@{}//urls:{}".format(name, sha256)),
sha256 = sha256,
)
_whl_library(
name = "{name}_{sha256}".format(name = name, sha256 = sha256),
- file = "@{name}_{sha256}//:whl".format(name = name, sha256 = sha256),
+ file = _label("@{}//:whl".format(whl_repo)),
requirement = requirement, # do we need this?
+ repo = repo,
**kwargs
)
@@ -87,6 +144,8 @@ def _whl_index_impl(rctx):
fail(result)
contents = rctx.read(html)
+ rctx.delete(html)
+
_, _, hrefs = contents.partition("
Date: Sun, 10 Dec 2023 11:51:19 +0900
Subject: [PATCH 05/81] hack through visibility to get tests almost passing
---
python/pip_install/pip_repository.bzl | 4 +-
.../generate_group_library_build_bazel.bzl | 4 +-
.../generate_whl_library_build_bazel.bzl | 8 ++-
python/private/bzlmod/minihub.bzl | 66 ++++++++++---------
4 files changed, 46 insertions(+), 36 deletions(-)
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index 76da7af65..72e62aa99 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -683,12 +683,14 @@ def _whl_library_impl(rctx):
# Manually construct the PYTHONPATH since we cannot use the toolchain here
environment = _create_repository_execution_environment(rctx, python_interpreter)
-
whl_path = None
if rctx.attr.file:
whl_path = rctx.path(rctx.attr.file).realpath
if whl_path.basename.endswith("tar.gz"):
whl_path = None
+ else:
+ rctx.symlink(whl_path, whl_path.basename)
+ whl_path = rctx.path(whl_path.basename)
if whl_path == None:
result = rctx.execute(
diff --git a/python/pip_install/private/generate_group_library_build_bazel.bzl b/python/pip_install/private/generate_group_library_build_bazel.bzl
index c122b0478..ccca8d8a2 100644
--- a/python/pip_install/private/generate_group_library_build_bazel.bzl
+++ b/python/pip_install/private/generate_group_library_build_bazel.bzl
@@ -81,7 +81,9 @@ def _generate_group_libraries(repo_prefix, group_name, group_members):
whl_deps = repr(whl_file_deps),
lib_public_label = PY_LIBRARY_PUBLIC_LABEL,
lib_deps = repr(lib_dependencies),
- visibility = repr(visibility),
+ #visibility = repr(visibility),
+ # TODO @aignas 2023-12-10: fix this
+ visibility = repr(["//visibility:public"]),
)
def generate_group_library_build_bazel(
diff --git a/python/pip_install/private/generate_whl_library_build_bazel.bzl b/python/pip_install/private/generate_whl_library_build_bazel.bzl
index 6d0f167f0..f5e5560d3 100644
--- a/python/pip_install/private/generate_whl_library_build_bazel.bzl
+++ b/python/pip_install/private/generate_whl_library_build_bazel.bzl
@@ -210,12 +210,16 @@ def generate_whl_library_build_bazel(
group_repo = repo_prefix + "_groups"
library_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), PY_LIBRARY_PUBLIC_LABEL)
whl_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), WHEEL_FILE_PUBLIC_LABEL)
- impl_vis = "@%s//:__pkg__" % (group_repo,)
+
+ # TODO @aignas 2023-12-10: fix this
+ impl_vis = "//visibility:public"
else:
library_impl_label = PY_LIBRARY_IMPL_LABEL
whl_impl_label = WHEEL_FILE_IMPL_LABEL
- impl_vis = "//visibility:private"
+
+ # TODO @aignas 2023-12-10: fix this
+ impl_vis = "//visibility:public"
contents = "\n".join(
[
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index 353a257b7..6d22f264a 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -32,9 +32,10 @@ There is a single Pip hub repository, which creates the following repos:
This is created to make use of the parallelism that can be achieved if fetching
is done in separate threads, one for each external repository.
"""
+
load("//python/pip_install:pip_repository.bzl", _whl_library = "whl_library")
-load("//python/private:text_util.bzl", "render")
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
+load("//python/private:text_util.bzl", "render")
_this = str(Label("//:unknown"))
@@ -46,22 +47,22 @@ def _label(label):
_os_in_tag = {
"linux": "linux",
- "manylinux": "linux",
- "win": "windows",
"macosx": "osx",
+ "manylinux": "linux",
"musllinux": "linux",
+ "win": "windows",
}
_cpu_in_tag = {
+ "aarch64": "aarch64",
"amd64": "x86_64",
- "x86_64": "x86_64",
- "i686": "x86_32",
+ "arm64": "aarch64",
"i386": "x86_32",
- "s390x": "s390x",
+ "i686": "x86_32",
"ppc64le": "ppc",
- "arm64": "aarch64",
- "aarch64": "aarch64",
+ "s390x": "s390x",
"win32": "x86_32",
+ "x86_64": "x86_64",
}
def _parse_os_from_tag(platform_tag):
@@ -126,7 +127,7 @@ def whl_library(name, distribution, requirement, repo, **kwargs):
_whl_library(
name = "{name}_{sha256}".format(name = name, sha256 = sha256),
file = _label("@{}//:whl".format(whl_repo)),
- requirement = requirement, # do we need this?
+ requirement = requirement, # do we need this?
repo = repo,
**kwargs
)
@@ -137,8 +138,8 @@ def _whl_index_impl(rctx):
for i, index_url in enumerate(rctx.attr.indexes):
html = "index-{}.html".format(i)
result = rctx.download(
- url=index_url + "/" + rctx.attr.distribution,
- output=html,
+ url = index_url + "/" + rctx.attr.distribution,
+ output = html,
)
if not result.success:
fail(result)
@@ -155,21 +156,22 @@ def _whl_index_impl(rctx):
continue
files.append(struct(
- url=url,
- sha256=sha256,
+ url = url,
+ sha256 = sha256,
))
if not files:
fail("Could not find any files for: {}".format(rctx.attr.distribution))
for file in files:
- rctx.file("urls/{}".format(file.sha256), "{}\n".format(file.url))
+ contents = json.encode(file)
+ rctx.file("urls/{}".format(file.sha256), contents)
rctx.file("urls/BUILD.bazel", """exports_files(glob(["*"]), visibility={})""".format(
render.list([
"@@{}_{}_whl//:__pkg__".format(rctx.attr.name, file.sha256)
for file in files
- ])
+ ]),
))
abi = "cp" + rctx.attr.repo.rpartition("_")[2]
@@ -180,9 +182,9 @@ def _whl_index_impl(rctx):
select = {}
for file in files:
tmpl = "@{name}_{distribution}_{sha256}//:{{target}}".format(
- name=rctx.attr.repo,
- distribution=rctx.attr.distribution,
- sha256=file.sha256,
+ name = rctx.attr.repo,
+ distribution = rctx.attr.distribution,
+ sha256 = file.sha256,
)
_, _, filename = file.url.strip().rpartition("/")
@@ -212,7 +214,7 @@ config_setting(
"@platforms//os:{os}",
],
visibility = ["//visibility:private"],
-)""".format(platform=platform, cpu=cpu, os=os)
+)""".format(platform = platform, cpu = cpu, os = os)
if config_setting not in build_contents:
build_contents.append(config_setting)
@@ -221,21 +223,21 @@ config_setting(
build_contents += [
render.alias(
- name=target,
- actual=actual.format(target=target) if actual else render.select({k: v.format(target=target) for k, v in select.items()}),
- visibility=["//visibility:public"],
+ name = target,
+ actual = actual.format(target = target) if actual else render.select({k: v.format(target = target) for k, v in select.items()}),
+ visibility = ["//visibility:public"],
)
- for target in ["pkg", "whl", "data", "dist_info"]
+ for target in ["pkg", "whl", "data", "dist_info", "_whl", "_pkg"]
]
rctx.file("BUILD.bazel", "\n\n".join(build_contents))
whl_index = repository_rule(
attrs = {
- "distribution": attr.string(mandatory=True),
- "indexes": attr.string_list(mandatory=True),
- "repo": attr.string(mandatory=True),
- "sha256s": attr.string_list(mandatory=True),
+ "distribution": attr.string(mandatory = True),
+ "indexes": attr.string_list(mandatory = True),
+ "repo": attr.string(mandatory = True),
+ "sha256s": attr.string_list(mandatory = True),
},
doc = """A rule for bzlmod mulitple pip repository creation. PRIVATE USE ONLY.""",
implementation = _whl_index_impl,
@@ -247,11 +249,11 @@ def _whl_archive_impl(rctx):
# TODO @aignas 2023-12-09: solve this without restarts
url_file = rctx.path(rctx.attr.url_file)
- url = rctx.read(url_file)
+ url = json.decode(rctx.read(url_file))["url"]
_, _, filename = url.rpartition("/")
filename = filename.strip()
- result = rctx.download(url, output=filename, sha256=rctx.attr.sha256)
+ result = rctx.download(url, output = filename, sha256 = rctx.attr.sha256)
if not result.success:
fail(result)
@@ -265,13 +267,13 @@ filegroup(
srcs=["{filename}"],
visibility=["//visibility:public"],
)
-""".format(filename=filename),
+""".format(filename = filename),
)
whl_archive = repository_rule(
attrs = {
- "sha256": attr.string(mandatory=False),
- "url_file": attr.label(mandatory=True),
+ "sha256": attr.string(mandatory = False),
+ "url_file": attr.label(mandatory = True),
},
doc = """A rule for bzlmod mulitple pip repository creation. PRIVATE USE ONLY.""",
implementation = _whl_archive_impl,
From 3aa23a27a184726d39878e8765d13a1eeb8070ba Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Sun, 10 Dec 2023 12:43:21 +0900
Subject: [PATCH 06/81] document analysis of the current approach and a new
approach
---
python/private/bzlmod/minihub.bzl | 51 +++++++++++++++++++++++++++++++
1 file changed, 51 insertions(+)
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index 6d22f264a..8c55c977b 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -14,6 +14,8 @@
"""The overall design is:
+# Attempt 1
+
There is a single Pip hub repository, which creates the following repos:
* `whl_index` that downloads the SimpleAPI page for a particular package
from the given indexes. It creates labels with URLs that can be used
@@ -31,6 +33,55 @@ There is a single Pip hub repository, which creates the following repos:
This is created to make use of the parallelism that can be achieved if fetching
is done in separate threads, one for each external repository.
+
+## Notes on the approach above
+
+Pros:
+* Really fast, no need to re-download the wheels when changing the contents of
+ `whl_library`.
+Cons:
+* The sha256 files in filenames makes things difficult to read/understand.
+* The cyclic dependency groups need extra work as the visibility between targets needs
+ to be ironed out.
+* The whl_annotations break, because users would need to specify weird repos in
+ their `use_repo` statements in the `MODULE.bazel` in order to make the
+ annotations useful. The need for forwarding the aliases based on the
+ annotations is real.
+* The index would be different for different lock files.
+
+# Approach 2
+
+* In case we use requirements:
+ * `pypi_metadata` spoke repo that exposes the following for each distribution name:
+ `metadata.json - contains shas and filenames
+ * `pypi_metadata` hub repo that has aliases for all repos in one place,
+ helps with label generation/visibility.
+ * `whl_lock` hub repo that uses labels from `pypi_metadata` hub to generate a
+ single lock file: `lock.json`.
+* In case we use `pdm` or `poetry` or `hatch` lock files:
+ * `whl_lock` repo that translates the file into `lock.json`.
+* `pip.bzl` extension materializes the `whl_lock//:lock.json` file and defines the `whl_library` repos:
+ * For each whl name that we are interested in, create a `http_file` repo for the wheel.
+ * Generate a `whl_library` by passing a `file` argument to the `http_file`.
+ * If the whl is multi-platform - whl_library minihub does not need to be created.
+ * If the whl is platform-specific - whl_library minihub needs to be created.
+
+Pros:
+* Solves `sha256` not being in repo names
+* Lock format can be the same for all
+* We may include whl metadata in the lock which means that we may have the dep graph
+ before creating the `whl_libraries`. If we have that, we can generate the cyclic dependency groups procedurally.
+Cons:
+* cyclic dependency groups for platform-specific wheels need a different approach than
+ what we have today.
+* whl_annotations for platform-specific wheels could be worked arround only in a subset
+ of cases. This is the analysis for each field:
+ * additive_build_content => What to do?
+ * copy_files => Apply to each platform-specific wheel and it will be OK and we will nede to generate aliases for them in the minihub.
+ * copy_executables => Apply to each platform-specific wheel and it will be OK and we will need to generate aliases for them in the minihub.
+ * data => Apply to each platform-specific wheel and it will be OK.
+ * data_exclude_glob => Apply to each platform-specific wheel and it will be OK.
+ * srcs_exclude_glob => Apply to each platform-specific wheel and it will be OK.
"""
load("//python/pip_install:pip_repository.bzl", _whl_library = "whl_library")
From 1e9f6fd29eec81cb2eb6496d6ed4f9dae46e6453 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Sun, 10 Dec 2023 12:43:33 +0900
Subject: [PATCH 07/81] start implementing a new approach
---
python/private/bzlmod/pypi_metadata.bzl | 59 +++++++++++++++++++++++++
1 file changed, 59 insertions(+)
create mode 100644 python/private/bzlmod/pypi_metadata.bzl
diff --git a/python/private/bzlmod/pypi_metadata.bzl b/python/private/bzlmod/pypi_metadata.bzl
new file mode 100644
index 000000000..1d1ae0bc5
--- /dev/null
+++ b/python/private/bzlmod/pypi_metadata.bzl
@@ -0,0 +1,59 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""PyPI metadata hub and spoke repos"""
+
+def whl_lock(name, *requirements, **kwargs):
+ indexes = kwargs.get("indexes", ["https://pypi.org/simple"])
+
+ sha_by_pkg = {}
+ for requirement in requirements:
+ sha256s = [sha.strip() for sha in requirement.split("--hash=sha256:")[1:]]
+ distribution, _, _ = requirement.partition("==")
+ distribution, _, _ = distribution.partition("[")
+
+ if distribution not in sha_by_pkg:
+ sha_by_pkg[distribution] = {}
+
+ for sha in sha256s:
+ sha_by_pkg[distribution][sha] = True
+
+ pass
+
+def _whl_lock_impl(rctx):
+ fail("TODO")
+
+_whl_lock = repository_rule(
+ attrs = {
+ },
+ implementation = _whl_lock_impl,
+)
+
+def _pypi_metadata_impl(rctx):
+ fail("TODO")
+
+pypi_metadata = repository_rule(
+ attrs = {
+ },
+ implementation = _pypi_metadata_impl,
+)
+
+def _pypi_distribution_metadata_impl(rctx):
+ fail("TODO")
+
+pypi_distribution_metadata = repository_rule(
+ attrs = {
+ },
+ implementation = _pypi_distribution_metadata_impl,
+)
From d78f2822efe1990a37f9156e726fbd3cdd7cdb73 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Mon, 11 Dec 2023 10:08:01 +0900
Subject: [PATCH 08/81] wip
---
examples/bzlmod/MODULE.bazel | 2 +-
python/private/bzlmod/label.bzl | 23 ++++++
python/private/bzlmod/minihub.bzl | 45 +++---------
python/private/bzlmod/pip.bzl | 33 ++++++++-
python/private/bzlmod/pypi_metadata.bzl | 98 +++++++++++++++++++++++--
5 files changed, 159 insertions(+), 42 deletions(-)
create mode 100644 python/private/bzlmod/label.bzl
diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel
index 44d686e3d..a2084639e 100644
--- a/examples/bzlmod/MODULE.bazel
+++ b/examples/bzlmod/MODULE.bazel
@@ -79,7 +79,7 @@ pip.whl_mods(
hub_name = "whl_mods_hub",
whl_name = "wheel",
)
-use_repo(pip, "whl_mods_hub")
+use_repo(pip, "whl_mods_hub", "whl_lock")
# To fetch pip dependencies, use pip.parse. We can pass in various options,
# but typically we pass requirements and the Python version. The Python
diff --git a/python/private/bzlmod/label.bzl b/python/private/bzlmod/label.bzl
new file mode 100644
index 000000000..eca3c7808
--- /dev/null
+++ b/python/private/bzlmod/label.bzl
@@ -0,0 +1,23 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"A small utility to create a label usable internally"
+
+_this = str(Label("//:unknown"))
+
+def label(label):
+ """This function allows us to construct labels to pass to rules."""
+ prefix, _, _ = _this.partition("//")
+ prefix = prefix + "~pip~"
+ return Label(label.replace("@", prefix))
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index 8c55c977b..7b5aa445a 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -82,19 +82,19 @@ Cons:
* data => Apply to each platform-specific wheel and it will be OK.
* data_exclude_glob => Apply to each platform-specific wheel and it will be OK.
* srcs_exclude_glob => Apply to each platform-specific wheel and it will be OK.
+
+## Notes on this approach
+
+* We need to define the `whl_lock` and related repos in a separate bzlmod
+ extension. This is not something we want, because we increase the API scope
+ which is not desirable.
+
"""
load("//python/pip_install:pip_repository.bzl", _whl_library = "whl_library")
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
load("//python/private:text_util.bzl", "render")
-
-_this = str(Label("//:unknown"))
-
-def _label(label):
- """This function allows us to construct labels to pass to rules."""
- prefix, _, _ = _this.partition("//")
- prefix = prefix + "~pip~"
- return Label(label.replace("@", prefix))
+load(":label.bzl", _label = "label")
_os_in_tag = {
"linux": "linux",
@@ -139,32 +139,11 @@ def _parse_platform_tag(platform_tag):
cpu = _parse_cpu_from_tag(platform_tag)
return os, cpu
-def whl_library(name, distribution, requirement, repo, **kwargs):
+def whl_library(name, metadata, **kwargs):
"""Generate a number of third party repos for a particular wheel.
"""
- indexes = kwargs.get("indexes", ["https://pypi.org/simple"])
- sha256s = [sha.strip() for sha in requirement.split("--hash=sha256:")[1:]]
-
- # Defines targets:
- # * whl - depending on the platform, return the correct whl defined in "name_sha.whl"
- # * pkg - depending on the platform, return the correct py_library target in "name_sha"
- # * dist_info - depending on the platform, return the correct py_library target in "name_sha"
- # * data - depending on the platform, return the correct py_library target in "name_sha"
- #
- # Needs:
- # * Select on the Python interpreter version
- # * Select on the glibc/musllibc or ask the user to provide whether they want musllibc or glibc at init
- # * Select on the platform
- whl_index(
- name = name,
- distribution = distribution,
- sha256s = sha256s,
- indexes = indexes,
- repo = repo,
- )
-
- for sha256 in sha256s:
- whl_repo = "{}_{}_whl".format(name, sha256)
+ for filename, sha256 in metadata.items():
+ whl_repo = "{}_{}_whl".format(name, filename)
# We would use http_file, but we are passing the URL to use via a file,
# if the url is known (in case of using pdm lock), we could use an
@@ -178,8 +157,6 @@ def whl_library(name, distribution, requirement, repo, **kwargs):
_whl_library(
name = "{name}_{sha256}".format(name = name, sha256 = sha256),
file = _label("@{}//:whl".format(whl_repo)),
- requirement = requirement, # do we need this?
- repo = repo,
**kwargs
)
diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl
index 0a24a8c04..3e3b98172 100644
--- a/python/private/bzlmod/pip.bzl
+++ b/python/private/bzlmod/pip.bzl
@@ -22,13 +22,16 @@ load(
"locked_requirements_label",
"pip_repository_attrs",
"use_isolated",
+ "whl_library",
)
load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
load("//python/private:full_version.bzl", "full_version")
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
load("//python/private:version_label.bzl", "version_label")
-load(":minihub.bzl", "whl_library")
+#load(":minihub.bzl", "whl_library")
+load(":pypi_metadata.bzl", "whl_lock")
+load(":label.bzl", "label")
load(":pip_repository.bzl", "pip_repository")
def _whl_mods_impl(mctx):
@@ -150,7 +153,6 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides):
whl_library(
name = "%s_%s" % (pip_name, whl_name),
- distribution = whl_name,
requirement = requirement_line,
repo = pip_name,
repo_prefix = pip_name + "_",
@@ -250,6 +252,7 @@ def _pip_impl(module_ctx):
_overriden_whl_set = {}
whl_overrides = {}
+ all_requirements = []
for module in module_ctx.modules:
for attr in module.tags.override:
if not module.is_root:
@@ -276,6 +279,32 @@ def _pip_impl(module_ctx):
whl_overrides[whl_name][patch].whls.append(attr.file)
+ all_requirements = []
+ for module in module_ctx.modules:
+ for pip_attr in module.tags.parse:
+ for requirements_lock in [
+ pip_attr.requirements_lock,
+ pip_attr.requirements_linux,
+ pip_attr.requirements_darwin,
+ pip_attr.requirements_windows,
+ ]:
+ if not requirements_lock:
+ continue
+
+ requirements_lock_content = module_ctx.read(requirements_lock)
+ parse_result = parse_requirements(requirements_lock_content)
+ requirements = parse_result.requirements
+ all_requirements.extend([line for _, line in requirements])
+
+ whl_lock(
+ name = "whl_lock",
+ requirements = all_requirements,
+ #indexes = kwargs.get("indexes"),
+ )
+
+ #lock_path = module_ctx.path(label("@whl_lock//:lock.json"))
+ #fail(lock_path)
+
# Used to track all the different pip hubs and the spoke pip Python
# versions.
pip_hub_map = {}
diff --git a/python/private/bzlmod/pypi_metadata.bzl b/python/private/bzlmod/pypi_metadata.bzl
index 1d1ae0bc5..4c16b40a9 100644
--- a/python/private/bzlmod/pypi_metadata.bzl
+++ b/python/private/bzlmod/pypi_metadata.bzl
@@ -14,7 +14,16 @@
"""PyPI metadata hub and spoke repos"""
-def whl_lock(name, *requirements, **kwargs):
+load("//python/private:normalize_name.bzl", "normalize_name")
+load(":label.bzl", "label")
+load("//python/private:text_util.bzl", "render")
+
+whl_lock = module_extension(
+ implementation = _pip_impl,
+ tag_classes = {
+)
+
+def whl_lock(name, requirements, **kwargs):
indexes = kwargs.get("indexes", ["https://pypi.org/simple"])
sha_by_pkg = {}
@@ -22,6 +31,7 @@ def whl_lock(name, *requirements, **kwargs):
sha256s = [sha.strip() for sha in requirement.split("--hash=sha256:")[1:]]
distribution, _, _ = requirement.partition("==")
distribution, _, _ = distribution.partition("[")
+ distribution = normalize_name(distribution)
if distribution not in sha_by_pkg:
sha_by_pkg[distribution] = {}
@@ -29,31 +39,109 @@ def whl_lock(name, *requirements, **kwargs):
for sha in sha256s:
sha_by_pkg[distribution][sha] = True
- pass
+ # TODO @aignas 2023-12-10: make this global across all hub repos
+ for distribution, shas in sha_by_pkg.items():
+ pypi_distribution_metadata(
+ name="{}_{}_metadata".format(name, distribution),
+ distribution=distribution,
+ sha256s=shas,
+ indexes=indexes,
+ )
+
+ pypi_metadata(
+ name="{}_metadata".format(name),
+ distributions=sha_by_pkg.keys(),
+ )
+
+ _whl_lock(
+ name = name,
+ srcs = [
+ label("@{}_{}_metadata//:metadata.json".format(name, distribution))
+ for distribution in sha_by_pkg
+ ],
+ )
def _whl_lock_impl(rctx):
- fail("TODO")
+ lock = {}
+ for src in rctx.attr.srcs:
+ contents = json.decode(rctx.read(src))
+
+ _, _, distribution = str(src).partition(rctx.attr.name)
+ distribution, _, _ = distribution.rpartition("_metadata")
+ distribution = distribution.strip("_")
+ lock[distribution] = contents
+
+ rctx.file("lock.json", json.encode(lock))
+ rctx.file("BUILD.bazel", """exports_files(["lock.json"], visibility=["//visibility:public"])""")
+
_whl_lock = repository_rule(
attrs = {
+ "srcs": attr.label_list(),
},
implementation = _whl_lock_impl,
)
def _pypi_metadata_impl(rctx):
- fail("TODO")
+ aliases = {
+ distribution: "@@{}_{}_metadata//:metadata.json".format(
+ rctx.name.replace("_metadata", ""),
+ distribution,
+ )
+ for distribution in rctx.attr.distributions
+ }
+ build_contents = [
+ render.alias(name=name, actual=actual, visibility=["//visibility:public"])
+ for name, actual in aliases.items()
+ ]
+ rctx.file("BUILD.bazel", "\n\n".join(build_contents))
pypi_metadata = repository_rule(
attrs = {
+ "distributions": attr.string_list(),
},
implementation = _pypi_metadata_impl,
)
def _pypi_distribution_metadata_impl(rctx):
- fail("TODO")
+ files = []
+ want_shas = {sha: True for sha in rctx.attr.sha256s}
+ for i, index_url in enumerate(rctx.attr.indexes):
+ html = "index-{}.html".format(i)
+ result = rctx.download(
+ url = index_url + "/" + rctx.attr.distribution,
+ output = html,
+ )
+ if not result.success:
+ fail(result)
+
+ contents = rctx.read(html)
+ rctx.delete(html)
+
+ _, _, hrefs = contents.partition("
Date: Mon, 11 Dec 2023 17:33:00 +0900
Subject: [PATCH 09/81] work out the visibility and cleanup
---
examples/bzlmod/MODULE.bazel | 2 +-
.../generate_whl_library_build_bazel.bzl | 12 +-
python/private/bzlmod/minihub.bzl | 125 +++++++++---------
python/private/bzlmod/pip.bzl | 7 +-
python/private/bzlmod/pypi_metadata.bzl | 78 ++---------
5 files changed, 82 insertions(+), 142 deletions(-)
diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel
index a2084639e..44d686e3d 100644
--- a/examples/bzlmod/MODULE.bazel
+++ b/examples/bzlmod/MODULE.bazel
@@ -79,7 +79,7 @@ pip.whl_mods(
hub_name = "whl_mods_hub",
whl_name = "wheel",
)
-use_repo(pip, "whl_mods_hub", "whl_lock")
+use_repo(pip, "whl_mods_hub")
# To fetch pip dependencies, use pip.parse. We can pass in various options,
# but typically we pass requirements and the Python version. The Python
diff --git a/python/pip_install/private/generate_whl_library_build_bazel.bzl b/python/pip_install/private/generate_whl_library_build_bazel.bzl
index f5e5560d3..d81823c5e 100644
--- a/python/pip_install/private/generate_whl_library_build_bazel.bzl
+++ b/python/pip_install/private/generate_whl_library_build_bazel.bzl
@@ -25,6 +25,7 @@ load(
"WHEEL_FILE_PUBLIC_LABEL",
)
load("//python/private:normalize_name.bzl", "normalize_name")
+load("//python/private:parse_whl_name.bzl", "parse_whl_name")
_COPY_FILE_TEMPLATE = """\
copy_file(
@@ -210,16 +211,15 @@ def generate_whl_library_build_bazel(
group_repo = repo_prefix + "_groups"
library_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), PY_LIBRARY_PUBLIC_LABEL)
whl_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), WHEEL_FILE_PUBLIC_LABEL)
-
- # TODO @aignas 2023-12-10: fix this
- impl_vis = "//visibility:public"
+ impl_vis = "@{}{}//:__pkg__".format(
+ repo_prefix,
+ normalize_name(parse_whl_name(whl_name).distribution),
+ )
else:
library_impl_label = PY_LIBRARY_IMPL_LABEL
whl_impl_label = WHEEL_FILE_IMPL_LABEL
-
- # TODO @aignas 2023-12-10: fix this
- impl_vis = "//visibility:public"
+ impl_vis = "//visibility:private"
contents = "\n".join(
[
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index 7b5aa445a..c09c668c3 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -92,6 +92,7 @@ Cons:
"""
load("//python/pip_install:pip_repository.bzl", _whl_library = "whl_library")
+load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
load("//python/private:text_util.bzl", "render")
load(":label.bzl", _label = "label")
@@ -139,89 +140,75 @@ def _parse_platform_tag(platform_tag):
cpu = _parse_cpu_from_tag(platform_tag)
return os, cpu
-def whl_library(name, metadata, **kwargs):
+def whl_library(name, requirement, **kwargs):
"""Generate a number of third party repos for a particular wheel.
"""
- for filename, sha256 in metadata.items():
- whl_repo = "{}_{}_whl".format(name, filename)
+ sha256s = [sha.strip() for sha in requirement.split("--hash=sha256:")[1:]]
+
+ distribution, _, _ = requirement.partition("==")
+ distribution, _, _ = distribution.partition("[")
+ distribution = normalize_name(distribution)
+
+ metadata = _label("@{}_metadata//:files.json".format(distribution))
+
+ whl_minihub(
+ name = name,
+ repo = kwargs.get("repo"),
+ distribution = distribution,
+ sha256s = sha256s,
+ metadata = metadata,
+ )
+
+ for sha256 in sha256s:
+ whl_name = "{}_{}".format(name, sha256[:6])
# We would use http_file, but we are passing the URL to use via a file,
# if the url is known (in case of using pdm lock), we could use an
# http_file.
whl_archive(
- name = whl_repo,
- url_file = _label("@{}//urls:{}".format(name, sha256)),
+ name = whl_name + "_whl",
+ metadata = metadata,
sha256 = sha256,
)
_whl_library(
- name = "{name}_{sha256}".format(name = name, sha256 = sha256),
- file = _label("@{}//:whl".format(whl_repo)),
+ name = whl_name,
+ file = _label("@{}_whl//:whl".format(whl_name)),
+ requirement = requirement,
**kwargs
)
-def _whl_index_impl(rctx):
- files = []
- want_shas = {sha: True for sha in rctx.attr.sha256s}
- for i, index_url in enumerate(rctx.attr.indexes):
- html = "index-{}.html".format(i)
- result = rctx.download(
- url = index_url + "/" + rctx.attr.distribution,
- output = html,
- )
- if not result.success:
- fail(result)
-
- contents = rctx.read(html)
- rctx.delete(html)
-
- _, _, hrefs = contents.partition("
Date: Mon, 11 Dec 2023 17:45:05 +0900
Subject: [PATCH 10/81] wip
---
.../generate_group_library_build_bazel.bzl | 6 ++---
python/private/bzlmod/minihub.bzl | 25 ++++++++++++++++++-
python/private/bzlmod/pypi_metadata.bzl | 1 -
3 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/python/pip_install/private/generate_group_library_build_bazel.bzl b/python/pip_install/private/generate_group_library_build_bazel.bzl
index ccca8d8a2..e77613e73 100644
--- a/python/pip_install/private/generate_group_library_build_bazel.bzl
+++ b/python/pip_install/private/generate_group_library_build_bazel.bzl
@@ -74,6 +74,8 @@ def _generate_group_libraries(repo_prefix, group_name, group_members):
"@%s%s//:__pkg__" % (repo_prefix, normalize_name(d))
for d in group_members
]
+ # TODO @aignas 2023-12-10: fix this
+ visibility = ["//visibility:public"]
return _GROUP_TEMPLATE.format(
name = normalize_name(group_name),
@@ -81,9 +83,7 @@ def _generate_group_libraries(repo_prefix, group_name, group_members):
whl_deps = repr(whl_file_deps),
lib_public_label = PY_LIBRARY_PUBLIC_LABEL,
lib_deps = repr(lib_dependencies),
- #visibility = repr(visibility),
- # TODO @aignas 2023-12-10: fix this
- visibility = repr(["//visibility:public"]),
+ visibility = repr(visibility),
)
def generate_group_library_build_bazel(
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index c09c668c3..195b3158f 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -245,6 +245,30 @@ config_setting(
for target in ["pkg", "whl", "data", "dist_info"]
]
+ # The overall architecture:
+ # * `whl_library_for_a_whl should generate only the private targets
+ # * `whl_minihub` should do the `group` to `private` indirection as needed.
+ #
+ # then the group visibility settings remain the same.
+ # then we can also set the private target visibility to something else than public
+ # e.g. the _sha265 targets can only be accessed by the minihub
+
+ # TODO @aignas 2023-12-11: the code here should be doing this
+ #
+ # if group_name:
+ # group_repo = repo_prefix + "_groups"
+ # library_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), PY_LIBRARY_PUBLIC_LABEL)
+ # whl_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), WHEEL_FILE_PUBLIC_LABEL)
+ # impl_vis = "@{}{}//:__pkg__".format(
+ # repo_prefix,
+ # normalize_name(parse_whl_name(whl_name).distribution),
+ # )
+
+ # else:
+ # library_impl_label = PY_LIBRARY_IMPL_LABEL
+ # whl_impl_label = WHEEL_FILE_IMPL_LABEL
+ # impl_vis = "//visibility:private"
+
build_contents += [
render.alias(
name = target,
@@ -271,7 +295,6 @@ def _whl_archive_impl(rctx):
prefix, _, _ = rctx.attr.name.rpartition("_")
prefix, _, _ = prefix.rpartition("_")
- # TODO @aignas 2023-12-09: solve this without restarts
metadata = rctx.path(rctx.attr.metadata)
files = json.decode(rctx.read(metadata))
sha256 = rctx.attr.sha256
diff --git a/python/private/bzlmod/pypi_metadata.bzl b/python/private/bzlmod/pypi_metadata.bzl
index 8dfd2936d..79a48d9fd 100644
--- a/python/private/bzlmod/pypi_metadata.bzl
+++ b/python/private/bzlmod/pypi_metadata.bzl
@@ -32,7 +32,6 @@ def whl_lock(requirements, **kwargs):
for sha in sha256s:
sha_by_pkg[distribution][sha] = True
- # TODO @aignas 2023-12-10: make this global across all hub repos
for distribution, shas in sha_by_pkg.items():
pypi_distribution_metadata(
name = "{}_metadata".format(distribution),
From 75be4c720613000cf016d6296d44e5832e0fd7a3 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Tue, 12 Dec 2023 09:21:47 +0900
Subject: [PATCH 11/81] fix visibility of the group
---
.../generate_group_library_build_bazel.bzl | 2 -
.../generate_whl_library_build_bazel.bzl | 10 ++--
python/private/bzlmod/minihub.bzl | 57 ++++++++++---------
3 files changed, 35 insertions(+), 34 deletions(-)
diff --git a/python/pip_install/private/generate_group_library_build_bazel.bzl b/python/pip_install/private/generate_group_library_build_bazel.bzl
index e77613e73..c122b0478 100644
--- a/python/pip_install/private/generate_group_library_build_bazel.bzl
+++ b/python/pip_install/private/generate_group_library_build_bazel.bzl
@@ -74,8 +74,6 @@ def _generate_group_libraries(repo_prefix, group_name, group_members):
"@%s%s//:__pkg__" % (repo_prefix, normalize_name(d))
for d in group_members
]
- # TODO @aignas 2023-12-10: fix this
- visibility = ["//visibility:public"]
return _GROUP_TEMPLATE.format(
name = normalize_name(group_name),
diff --git a/python/pip_install/private/generate_whl_library_build_bazel.bzl b/python/pip_install/private/generate_whl_library_build_bazel.bzl
index d81823c5e..4c71fba84 100644
--- a/python/pip_install/private/generate_whl_library_build_bazel.bzl
+++ b/python/pip_install/private/generate_whl_library_build_bazel.bzl
@@ -211,15 +211,10 @@ def generate_whl_library_build_bazel(
group_repo = repo_prefix + "_groups"
library_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), PY_LIBRARY_PUBLIC_LABEL)
whl_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), WHEEL_FILE_PUBLIC_LABEL)
- impl_vis = "@{}{}//:__pkg__".format(
- repo_prefix,
- normalize_name(parse_whl_name(whl_name).distribution),
- )
else:
library_impl_label = PY_LIBRARY_IMPL_LABEL
whl_impl_label = WHEEL_FILE_IMPL_LABEL
- impl_vis = "//visibility:private"
contents = "\n".join(
[
@@ -240,7 +235,10 @@ def generate_whl_library_build_bazel(
entry_point_prefix = WHEEL_ENTRY_POINT_PREFIX,
srcs_exclude = repr(srcs_exclude),
data = repr(data),
- impl_vis = repr([impl_vis]),
+ impl_vis = repr(["@{}{}//:__pkg__".format(
+ repo_prefix,
+ normalize_name(parse_whl_name(whl_name).distribution),
+ )]),
),
] + additional_content,
)
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index 195b3158f..8ef694fe4 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -154,6 +154,7 @@ def whl_library(name, requirement, **kwargs):
whl_minihub(
name = name,
repo = kwargs.get("repo"),
+ group_name = kwargs.get("group_name"),
distribution = distribution,
sha256s = sha256s,
metadata = metadata,
@@ -236,15 +237,6 @@ config_setting(
if len(select) == 1 and "//conditions:default" in select:
actual = repr(select["//conditions:default"])
- build_contents += [
- render.alias(
- name = target,
- actual = actual.format(target = target) if actual else render.select({k: v.format(target = target) for k, v in select.items()}),
- visibility = ["//visibility:public"],
- )
- for target in ["pkg", "whl", "data", "dist_info"]
- ]
-
# The overall architecture:
# * `whl_library_for_a_whl should generate only the private targets
# * `whl_minihub` should do the `group` to `private` indirection as needed.
@@ -253,29 +245,41 @@ config_setting(
# then we can also set the private target visibility to something else than public
# e.g. the _sha265 targets can only be accessed by the minihub
- # TODO @aignas 2023-12-11: the code here should be doing this
- #
- # if group_name:
- # group_repo = repo_prefix + "_groups"
- # library_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), PY_LIBRARY_PUBLIC_LABEL)
- # whl_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), WHEEL_FILE_PUBLIC_LABEL)
- # impl_vis = "@{}{}//:__pkg__".format(
- # repo_prefix,
- # normalize_name(parse_whl_name(whl_name).distribution),
- # )
-
- # else:
- # library_impl_label = PY_LIBRARY_IMPL_LABEL
- # whl_impl_label = WHEEL_FILE_IMPL_LABEL
- # impl_vis = "//visibility:private"
+ group_name = rctx.attr.group_name
+ if group_name:
+ group_repo = rctx.attr.repo + "__groups"
+ impl_vis = "@{}//:__pkg__".format(group_repo)
+ library_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), "pkg")
+ whl_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), "whl")
+ else:
+ library_impl_label = "_pkg"
+ whl_impl_label = "_whl"
+ impl_vis = "//visibility:private"
build_contents += [
render.alias(
name = target,
actual = actual.format(target = target) if actual else render.select({k: v.format(target = target) for k, v in select.items()}),
- visibility = ["@{}__groups//:__pkg__".format(rctx.attr.repo)],
+ visibility = [visibility],
+ )
+ for target, visibility in {
+ "data": "//visibility:public",
+ "dist_info": "//visibility:public",
+ "_pkg": impl_vis,
+ "_whl": impl_vis,
+ }.items()
+ ]
+
+ build_contents += [
+ render.alias(
+ name = target,
+ actual = repr(actual),
+ visibility = ["//visibility:public"],
)
- for target in ["_whl", "_pkg"]
+ for target, actual in {
+ "pkg": library_impl_label,
+ "whl": whl_impl_label,
+ }.items()
]
rctx.file("BUILD.bazel", "\n\n".join(build_contents))
@@ -283,6 +287,7 @@ config_setting(
whl_minihub = repository_rule(
attrs = {
"distribution": attr.string(mandatory = True),
+ "group_name": attr.string(),
"metadata": attr.label(mandatory = True, allow_single_file = True),
"repo": attr.string(mandatory = True),
"sha256s": attr.string_list(mandatory = True),
From c01e65ae2346ab7d95cf232176f657b7c548720a Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Tue, 12 Dec 2023 09:31:47 +0900
Subject: [PATCH 12/81] use constants instead of strings for labels
---
.../generate_whl_library_build_bazel.bzl | 35 +++----------------
python/private/bzlmod/minihub.bzl | 25 ++++++++-----
2 files changed, 21 insertions(+), 39 deletions(-)
diff --git a/python/pip_install/private/generate_whl_library_build_bazel.bzl b/python/pip_install/private/generate_whl_library_build_bazel.bzl
index 4c71fba84..9b19fbe61 100644
--- a/python/pip_install/private/generate_whl_library_build_bazel.bzl
+++ b/python/pip_install/private/generate_whl_library_build_bazel.bzl
@@ -64,14 +64,14 @@ filegroup(
)
filegroup(
- name = "{whl_file_impl_label}",
+ name = "{whl_file_label}",
srcs = ["{whl_name}"],
data = {whl_file_deps},
visibility = {impl_vis},
)
py_library(
- name = "{py_library_impl_label}",
+ name = "{py_library_label}",
srcs = glob(
["site-packages/**/*.py"],
exclude={srcs_exclude},
@@ -90,16 +90,6 @@ py_library(
tags = {tags},
visibility = {impl_vis},
)
-
-alias(
- name = "{py_library_public_label}",
- actual = "{py_library_actual_label}",
-)
-
-alias(
- name = "{whl_file_public_label}",
- actual = "{whl_file_actual_label}",
-)
"""
def generate_whl_library_build_bazel(
@@ -203,31 +193,14 @@ def generate_whl_library_build_bazel(
for d in non_group_deps
]
- # If this library is a member of a group, its public label aliases need to
- # point to the group implementation rule not the implementation rules. We
- # also need to mark the implementation rules as visible to the group
- # implementation.
- if group_name:
- group_repo = repo_prefix + "_groups"
- library_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), PY_LIBRARY_PUBLIC_LABEL)
- whl_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), WHEEL_FILE_PUBLIC_LABEL)
-
- else:
- library_impl_label = PY_LIBRARY_IMPL_LABEL
- whl_impl_label = WHEEL_FILE_IMPL_LABEL
-
contents = "\n".join(
[
_BUILD_TEMPLATE.format(
- py_library_public_label = PY_LIBRARY_PUBLIC_LABEL,
- py_library_impl_label = PY_LIBRARY_IMPL_LABEL,
- py_library_actual_label = library_impl_label,
+ py_library_label = PY_LIBRARY_IMPL_LABEL,
dependencies = repr(lib_dependencies),
data_exclude = repr(_data_exclude),
whl_name = whl_name,
- whl_file_public_label = WHEEL_FILE_PUBLIC_LABEL,
- whl_file_impl_label = WHEEL_FILE_IMPL_LABEL,
- whl_file_actual_label = whl_impl_label,
+ whl_file_label = WHEEL_FILE_IMPL_LABEL,
whl_file_deps = repr(whl_file_deps),
tags = repr(tags),
data_label = DATA_LABEL,
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index 8ef694fe4..ae9090976 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -92,6 +92,15 @@ Cons:
"""
load("//python/pip_install:pip_repository.bzl", _whl_library = "whl_library")
+load(
+ "//python/private:labels.bzl",
+ "DATA_LABEL",
+ "DIST_INFO_LABEL",
+ "PY_LIBRARY_IMPL_LABEL",
+ "PY_LIBRARY_PUBLIC_LABEL",
+ "WHEEL_FILE_IMPL_LABEL",
+ "WHEEL_FILE_PUBLIC_LABEL",
+)
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
load("//python/private:text_util.bzl", "render")
@@ -252,8 +261,8 @@ config_setting(
library_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), "pkg")
whl_impl_label = "@%s//:%s_%s" % (group_repo, normalize_name(group_name), "whl")
else:
- library_impl_label = "_pkg"
- whl_impl_label = "_whl"
+ library_impl_label = PY_LIBRARY_IMPL_LABEL
+ whl_impl_label = WHEEL_FILE_IMPL_LABEL
impl_vis = "//visibility:private"
build_contents += [
@@ -263,10 +272,10 @@ config_setting(
visibility = [visibility],
)
for target, visibility in {
- "data": "//visibility:public",
- "dist_info": "//visibility:public",
- "_pkg": impl_vis,
- "_whl": impl_vis,
+ DATA_LABEL: "//visibility:public",
+ DIST_INFO_LABEL: "//visibility:public",
+ PY_LIBRARY_IMPL_LABEL: impl_vis,
+ WHEEL_FILE_IMPL_LABEL: impl_vis,
}.items()
]
@@ -277,8 +286,8 @@ config_setting(
visibility = ["//visibility:public"],
)
for target, actual in {
- "pkg": library_impl_label,
- "whl": whl_impl_label,
+ PY_LIBRARY_PUBLIC_LABEL: library_impl_label,
+ WHEEL_FILE_PUBLIC_LABEL: whl_impl_label,
}.items()
]
From 5b637cb383ff0ed834cdda0194168a2d45b79ef6 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Tue, 12 Dec 2023 09:43:13 +0900
Subject: [PATCH 13/81] do not symlink the whl and just pass along the label
---
examples/bzlmod/whl_mods/appended_build_content.BUILD | 6 +-----
python/pip_install/pip_repository.bzl | 11 ++++++-----
.../private/generate_whl_library_build_bazel.bzl | 4 ++--
3 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/examples/bzlmod/whl_mods/appended_build_content.BUILD b/examples/bzlmod/whl_mods/appended_build_content.BUILD
index 0ca118d7b..9d9f2cd2b 100644
--- a/examples/bzlmod/whl_mods/appended_build_content.BUILD
+++ b/examples/bzlmod/whl_mods/appended_build_content.BUILD
@@ -8,9 +8,5 @@ write_file(
filegroup(
name = "whl_orig",
- srcs = glob(
- ["*.whl"],
- allow_empty = False,
- exclude = ["*-patched-*.whl"],
- ),
+ srcs = ["_whl"],
)
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index 72e62aa99..2e90202cb 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -684,13 +684,13 @@ def _whl_library_impl(rctx):
environment = _create_repository_execution_environment(rctx, python_interpreter)
whl_path = None
+ whl_label = None
if rctx.attr.file:
- whl_path = rctx.path(rctx.attr.file).realpath
+ whl_label = rctx.attr.file
+ whl_path = rctx.path(whl_label).realpath
if whl_path.basename.endswith("tar.gz"):
whl_path = None
- else:
- rctx.symlink(whl_path, whl_path.basename)
- whl_path = rctx.path(whl_path.basename)
+ whl_label = None
if whl_path == None:
result = rctx.execute(
@@ -756,8 +756,9 @@ def _whl_library_impl(rctx):
entry_points[entry_point_without_py] = entry_point_script_name
build_file_contents = generate_whl_library_build_bazel(
+ name = metadata["name"],
repo_prefix = rctx.attr.repo_prefix,
- whl_name = whl_path.basename,
+ whl_name = whl_label or whl_path.basename,
dependencies = metadata["deps"],
group_name = rctx.attr.group_name,
group_deps = rctx.attr.group_deps,
diff --git a/python/pip_install/private/generate_whl_library_build_bazel.bzl b/python/pip_install/private/generate_whl_library_build_bazel.bzl
index 9b19fbe61..39694d4ac 100644
--- a/python/pip_install/private/generate_whl_library_build_bazel.bzl
+++ b/python/pip_install/private/generate_whl_library_build_bazel.bzl
@@ -25,7 +25,6 @@ load(
"WHEEL_FILE_PUBLIC_LABEL",
)
load("//python/private:normalize_name.bzl", "normalize_name")
-load("//python/private:parse_whl_name.bzl", "parse_whl_name")
_COPY_FILE_TEMPLATE = """\
copy_file(
@@ -94,6 +93,7 @@ py_library(
def generate_whl_library_build_bazel(
*,
+ name,
repo_prefix,
whl_name,
dependencies,
@@ -210,7 +210,7 @@ def generate_whl_library_build_bazel(
data = repr(data),
impl_vis = repr(["@{}{}//:__pkg__".format(
repo_prefix,
- normalize_name(parse_whl_name(whl_name).distribution),
+ normalize_name(name),
)]),
),
] + additional_content,
From 42f916d6fc4ea4425665d83b15a9be33b029b92d Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Tue, 12 Dec 2023 10:50:49 +0900
Subject: [PATCH 14/81] implement patching in minihub
---
python/private/bzlmod/minihub.bzl | 86 ++++++++++++++++++++++++++++++-
1 file changed, 84 insertions(+), 2 deletions(-)
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index ae9090976..a4fbdf22e 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -91,7 +91,9 @@ Cons:
"""
+load("//python:versions.bzl", "WINDOWS_NAME")
load("//python/pip_install:pip_repository.bzl", _whl_library = "whl_library")
+load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED")
load(
"//python/private:labels.bzl",
"DATA_LABEL",
@@ -103,6 +105,8 @@ load(
)
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
+load("//python/private:toolchains_repo.bzl", "get_host_os_arch")
+load("//python/private:patch_whl.bzl", "patch_whl")
load("//python/private:text_util.bzl", "render")
load(":label.bzl", _label = "label")
@@ -169,6 +173,8 @@ def whl_library(name, requirement, **kwargs):
metadata = metadata,
)
+ whl_patches = kwargs.pop("whl_patches", None)
+
for sha256 in sha256s:
whl_name = "{}_{}".format(name, sha256[:6])
@@ -179,6 +185,8 @@ def whl_library(name, requirement, **kwargs):
name = whl_name + "_whl",
metadata = metadata,
sha256 = sha256,
+ whl_patches = whl_patches,
+ # TODO @aignas 2023-12-12: do patching of the wheel here
)
_whl_library(
@@ -327,7 +335,27 @@ def _whl_archive_impl(rctx):
if not result.success:
fail(result)
- rctx.symlink(filename, "whl")
+ whl_path = rctx.path(filename)
+
+ if rctx.attr.whl_patches:
+ patches = {}
+ for patch_file, json_args in rctx.attr.whl_patches.items():
+ patch_dst = struct(**json.decode(json_args))
+ if whl_path.basename in patch_dst.whls:
+ patches[patch_file] = patch_dst.patch_strip
+
+
+ whl_path = patch_whl(
+ rctx,
+ # TODO @aignas 2023-12-12: do not use system Python
+ python_interpreter = _resolve_python_interpreter(rctx),
+ whl_path = whl_path,
+ patches = patches,
+ quiet = rctx.attr.quiet,
+ timeout = rctx.attr.timeout,
+ )
+
+ rctx.symlink(whl_path, "whl")
rctx.file(
"BUILD.bazel",
@@ -337,14 +365,68 @@ filegroup(
srcs=["{filename}"],
visibility=["//visibility:public"],
)
-""".format(filename = filename),
+""".format(filename = whl_path.basename),
)
whl_archive = repository_rule(
attrs = {
"metadata": attr.label(mandatory = True, allow_single_file = True),
+ "quiet": attr.bool(default=True),
"sha256": attr.string(mandatory = False),
+ "timeout": attr.int(default=60),
+ "whl_patches": attr.label_keyed_string_dict(
+ doc = """"a label-keyed-string dict that has
+ json.encode(struct([whl_file], patch_strip]) as values. This
+ is to maintain flexibility and correct bzlmod extension interface
+ until we have a better way to define whl_library and move whl
+ patching to a separate place. INTERNAL USE ONLY.""",
+ ),
+ "python_interpreter": attr.string(),
+ "python_interpreter_target": attr.label(),
},
doc = """A rule for bzlmod mulitple pip repository creation. PRIVATE USE ONLY.""",
implementation = _whl_archive_impl,
)
+
+def _get_python_interpreter_attr(rctx):
+ """A helper function for getting the `python_interpreter` attribute or it's default
+
+ Args:
+ rctx (repository_ctx): Handle to the rule repository context.
+
+ Returns:
+ str: The attribute value or it's default
+ """
+ if rctx.attr.python_interpreter:
+ return rctx.attr.python_interpreter
+
+ if "win" in rctx.os.name:
+ return "python.exe"
+ else:
+ return "python3"
+
+def _resolve_python_interpreter(rctx):
+ """Helper function to find the python interpreter from the common attributes
+
+ Args:
+ rctx: Handle to the rule repository context.
+ Returns: Python interpreter path.
+ """
+ python_interpreter = _get_python_interpreter_attr(rctx)
+
+ if rctx.attr.python_interpreter_target != None:
+ python_interpreter = rctx.path(rctx.attr.python_interpreter_target)
+
+ if BZLMOD_ENABLED:
+ (os, _) = get_host_os_arch(rctx)
+
+ # On Windows, the symlink doesn't work because Windows attempts to find
+ # Python DLLs where the symlink is, not where the symlink points.
+ if os == WINDOWS_NAME:
+ python_interpreter = python_interpreter.realpath
+ elif "/" not in python_interpreter:
+ found_python_interpreter = rctx.which(python_interpreter)
+ if not found_python_interpreter:
+ fail("python interpreter `{}` not found in PATH".format(python_interpreter))
+ python_interpreter = found_python_interpreter
+ return python_interpreter
From 825d28d8cd2ce600372479e6224987a20f364d6b Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Thu, 14 Dec 2023 09:23:58 +0900
Subject: [PATCH 15/81] wip
---
python/private/bzlmod/minihub.bzl | 26 ++++++++++++--------------
1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index a4fbdf22e..02d8afb75 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -39,10 +39,9 @@ is done in separate threads, one for each external repository.
Pros:
* Really fast, no need to re-download the wheels when changing the contents of
`whl_library`.
+* The cyclic dependency groups just work with a few tweaks.
Cons:
* The sha256 files in filenames makes things difficult to read/understand.
-* The cyclic dependency groups need extra work as the visibility between targets needs
- to be ironed out.
* The whl_annotations break, because users would need to specify weird repos in
their `use_repo` statements in the `MODULE.bazel` in order to make the
annotations useful. The need for forwarding the aliases based on the
@@ -76,12 +75,12 @@ Cons:
what we have today.
* whl_annotations for platform-specific wheels could be worked arround only in a subset
of cases. This is the analysis for each field:
- * additive_build_content => What to do?
- * copy_files => Apply to each platform-specific wheel and it will be OK and we will nede to generate aliases for them in the minihub.
- * copy_executables => Apply to each platform-specific wheel and it will be OK and we will need to generate aliases for them in the minihub.
- * data => Apply to each platform-specific wheel and it will be OK.
- * data_exclude_glob => Apply to each platform-specific wheel and it will be OK.
- * srcs_exclude_glob => Apply to each platform-specific wheel and it will be OK.
+ - [ ] additive_build_content => What to do?
+ - [.] copy_files => Apply to each platform-specific wheel and it will be OK and we will nede to generate aliases for them in the minihub.
+ - [.] copy_executables => Apply to each platform-specific wheel and it will be OK and we will need to generate aliases for them in the minihub.
+ - [x] data => Apply to each platform-specific wheel and it will be OK.
+ - [x] data_exclude_glob => Apply to each platform-specific wheel and it will be OK.
+ - [x] srcs_exclude_glob => Apply to each platform-specific wheel and it will be OK.
## Notes on this approach
@@ -105,9 +104,9 @@ load(
)
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
-load("//python/private:toolchains_repo.bzl", "get_host_os_arch")
load("//python/private:patch_whl.bzl", "patch_whl")
load("//python/private:text_util.bzl", "render")
+load("//python/private:toolchains_repo.bzl", "get_host_os_arch")
load(":label.bzl", _label = "label")
_os_in_tag = {
@@ -344,7 +343,6 @@ def _whl_archive_impl(rctx):
if whl_path.basename in patch_dst.whls:
patches[patch_file] = patch_dst.patch_strip
-
whl_path = patch_whl(
rctx,
# TODO @aignas 2023-12-12: do not use system Python
@@ -371,9 +369,11 @@ filegroup(
whl_archive = repository_rule(
attrs = {
"metadata": attr.label(mandatory = True, allow_single_file = True),
- "quiet": attr.bool(default=True),
+ "python_interpreter": attr.string(),
+ "python_interpreter_target": attr.label(),
+ "quiet": attr.bool(default = True),
"sha256": attr.string(mandatory = False),
- "timeout": attr.int(default=60),
+ "timeout": attr.int(default = 60),
"whl_patches": attr.label_keyed_string_dict(
doc = """"a label-keyed-string dict that has
json.encode(struct([whl_file], patch_strip]) as values. This
@@ -381,8 +381,6 @@ whl_archive = repository_rule(
until we have a better way to define whl_library and move whl
patching to a separate place. INTERNAL USE ONLY.""",
),
- "python_interpreter": attr.string(),
- "python_interpreter_target": attr.label(),
},
doc = """A rule for bzlmod mulitple pip repository creation. PRIVATE USE ONLY.""",
implementation = _whl_archive_impl,
From 0401d775d1a9124388b2594a1659fcd48a91da69 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Thu, 14 Dec 2023 09:31:08 +0900
Subject: [PATCH 16/81] remove TODOs
---
python/private/bzlmod/minihub.bzl | 2 --
1 file changed, 2 deletions(-)
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index 02d8afb75..bec881a7c 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -185,7 +185,6 @@ def whl_library(name, requirement, **kwargs):
metadata = metadata,
sha256 = sha256,
whl_patches = whl_patches,
- # TODO @aignas 2023-12-12: do patching of the wheel here
)
_whl_library(
@@ -345,7 +344,6 @@ def _whl_archive_impl(rctx):
whl_path = patch_whl(
rctx,
- # TODO @aignas 2023-12-12: do not use system Python
python_interpreter = _resolve_python_interpreter(rctx),
whl_path = whl_path,
patches = patches,
From f65d6adaa76710ba630c84723db1db6d14df7409 Mon Sep 17 00:00:00 2001
From: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
Date: Fri, 15 Dec 2023 09:51:53 +0900
Subject: [PATCH 17/81] wip: move the downloading of all of the wheels outside
whl_library macro
---
examples/bzlmod/whl_mods/BUILD.bazel | 2 +
python/private/bzlmod/minihub.bzl | 142 ++++++++++++++----------
python/private/bzlmod/pip.bzl | 17 +--
python/private/bzlmod/pypi_metadata.bzl | 119 ++++++++++++++++++--
4 files changed, 202 insertions(+), 78 deletions(-)
diff --git a/examples/bzlmod/whl_mods/BUILD.bazel b/examples/bzlmod/whl_mods/BUILD.bazel
index 6ca07dd2d..eeb4a4857 100644
--- a/examples/bzlmod/whl_mods/BUILD.bazel
+++ b/examples/bzlmod/whl_mods/BUILD.bazel
@@ -13,6 +13,8 @@ py_test(
"WHEEL_PKG_DIR": "pip_39_wheel",
},
main = "pip_whl_mods_test.py",
+ data = [
+ ],
deps = [
"@pip//requests:pkg",
"@pip//wheel:pkg",
diff --git a/python/private/bzlmod/minihub.bzl b/python/private/bzlmod/minihub.bzl
index bec881a7c..b7211e140 100644
--- a/python/private/bzlmod/minihub.bzl
+++ b/python/private/bzlmod/minihub.bzl
@@ -22,7 +22,7 @@ There is a single Pip hub repository, which creates the following repos:
to download things. Args:
* distribution - The name of the distribution.
* version - The version of the package.
-* `whl_archive` that downloads a particular wheel for a package, it accepts
+* `pypi_archive` that downloads a particular wheel for a package, it accepts
the following args:
* sha256 - The sha256 to download.
* url - The url to use. Optional.
@@ -152,7 +152,7 @@ def _parse_platform_tag(platform_tag):
cpu = _parse_cpu_from_tag(platform_tag)
return os, cpu
-def whl_library(name, requirement, **kwargs):
+def whl_library(name, *, requirement, files, **kwargs):
"""Generate a number of third party repos for a particular wheel.
"""
sha256s = [sha.strip() for sha in requirement.split("--hash=sha256:")[1:]]
@@ -161,39 +161,26 @@ def whl_library(name, requirement, **kwargs):
distribution, _, _ = distribution.partition("[")
distribution = normalize_name(distribution)
- metadata = _label("@{}_metadata//:files.json".format(distribution))
-
- whl_minihub(
- name = name,
- repo = kwargs.get("repo"),
- group_name = kwargs.get("group_name"),
- distribution = distribution,
- sha256s = sha256s,
- metadata = metadata,
- )
-
- whl_patches = kwargs.pop("whl_patches", None)
-
+ libs = {}
for sha256 in sha256s:
whl_name = "{}_{}".format(name, sha256[:6])
-
- # We would use http_file, but we are passing the URL to use via a file,
- # if the url is known (in case of using pdm lock), we could use an
- # http_file.
- whl_archive(
- name = whl_name + "_whl",
- metadata = metadata,
- sha256 = sha256,
- whl_patches = whl_patches,
- )
-
+ libs[sha256] = whl_name
_whl_library(
name = whl_name,
- file = _label("@{}_whl//:whl".format(whl_name)),
+ file = files.files[sha256],
requirement = requirement,
**kwargs
)
+ whl_minihub(
+ name = name,
+ repo = kwargs.get("repo"),
+ group_name = kwargs.get("group_name"),
+ libs = libs,
+ metadata = files.metadata,
+ annotation = kwargs.get("annotation"),
+ )
+
def _whl_minihub_impl(rctx):
metadata = rctx.path(rctx.attr.metadata)
files = json.decode(rctx.read(metadata))
@@ -201,19 +188,23 @@ def _whl_minihub_impl(rctx):
abi = "cp" + rctx.attr.repo.rpartition("_")[2]
build_contents = []
- sha256s = {sha: True for sha in rctx.attr.sha256s}
+ libs = rctx.attr.libs
actual = None
select = {}
- for file in files["files"]:
- sha256 = file["sha256"]
- if sha256 not in sha256s:
- continue
+ for sha256, repo_name in rctx.attr.libs.items():
+
+ url = None
+ for file in files["files"]:
+ if file["sha256"] == sha256:
+ url = file["url"]
+ break
+
+ if not url:
+ fail("could not find")
- tmpl = "@{name}_{distribution}_{sha256}//:{{target}}".format(
- name = rctx.attr.repo,
- distribution = rctx.attr.distribution,
- sha256 = sha256[:6],
+ tmpl = "@{repo_name}//:{{target}}".format(
+ repo_name = libs[sha256],
)
_, _, filename = file["url"].strip().rpartition("/")
@@ -271,18 +262,43 @@ config_setting(
whl_impl_label = WHEEL_FILE_IMPL_LABEL
impl_vis = "//visibility:private"
+ public_visibility = "//visibility:public"
+
+ alias_targets = {
+ DATA_LABEL: public_visibility,
+ DIST_INFO_LABEL: public_visibility,
+ PY_LIBRARY_IMPL_LABEL: impl_vis,
+ WHEEL_FILE_IMPL_LABEL: impl_vis,
+ }
+
+ if rctx.attr.annotation:
+ annotation = struct(**json.decode(rctx.read(rctx.attr.annotation)))
+
+ for dest in annotation.copy_files.values():
+ alias_targets["{}.copy".format(dest)] = public_visibility
+
+ for dest in annotation.copy_executables.values():
+ alias_targets["{}.copy".format(dest)] = public_visibility
+
+ # FIXME @aignas 2023-12-14: is this something that we want, looks a
+ # little bit hacky as we don't parse the visibility of the extra
+ # targets.
+ if annotation.additive_build_content:
+ targets_defined_in_additional_info = [
+ line.partition("=")[2].strip().strip("\"',")
+ for line in annotation.additive_build_content.split("\n")
+ if line.strip().startswith("name")
+ ]
+ for dest in targets_defined_in_additional_info:
+ alias_targets[dest] = public_visibility
+
build_contents += [
render.alias(
name = target,
actual = actual.format(target = target) if actual else render.select({k: v.format(target = target) for k, v in select.items()}),
visibility = [visibility],
)
- for target, visibility in {
- DATA_LABEL: "//visibility:public",
- DIST_INFO_LABEL: "//visibility:public",
- PY_LIBRARY_IMPL_LABEL: impl_vis,
- WHEEL_FILE_IMPL_LABEL: impl_vis,
- }.items()
+ for target, visibility in alias_targets.items()
]
build_contents += [
@@ -301,11 +317,17 @@ config_setting(
whl_minihub = repository_rule(
attrs = {
- "distribution": attr.string(mandatory = True),
+ "annotation": attr.label(
+ doc = (
+ "Optional json encoded file containing annotation to apply to the extracted wheel. " +
+ "See `package_annotation`"
+ ),
+ allow_files = True,
+ ),
"group_name": attr.string(),
+ "libs": attr.string_dict(mandatory = True),
"metadata": attr.label(mandatory = True, allow_single_file = True),
"repo": attr.string(mandatory = True),
- "sha256s": attr.string_list(mandatory = True),
},
doc = """A rule for bzlmod mulitple pip repository creation. PRIVATE USE ONLY.""",
implementation = _whl_minihub_impl,
@@ -315,17 +337,16 @@ def _whl_archive_impl(rctx):
prefix, _, _ = rctx.attr.name.rpartition("_")
prefix, _, _ = prefix.rpartition("_")
- metadata = rctx.path(rctx.attr.metadata)
- files = json.decode(rctx.read(metadata))
+ metadata = struct(**json.decode(rctx.read(rctx.path(rctx.attr.metadata))))
sha256 = rctx.attr.sha256
url = None
- for file in files["files"]:
+ for file in metadata.files:
if file["sha256"] == sha256:
url = file["url"]
break
if url == None:
- fail("Could not find a file with sha256 '{}' within: {}".format(sha256, files))
+ fail("Could not find a file with sha256 '{}' within: {}".format(sha256, metadata))
_, _, filename = url.rpartition("/")
filename = filename.strip()
@@ -335,13 +356,16 @@ def _whl_archive_impl(rctx):
whl_path = rctx.path(filename)
- if rctx.attr.whl_patches:
+ if rctx.attr.patches:
patches = {}
- for patch_file, json_args in rctx.attr.whl_patches.items():
+ for patch_file, json_args in rctx.attr.patches.items():
patch_dst = struct(**json.decode(json_args))
if whl_path.basename in patch_dst.whls:
patches[patch_file] = patch_dst.patch_strip
+ # TODO @aignas 2023-12-14: re-parse the metadata to ensure that we have a
+ # non-stale version of it
+ # Something like: whl_path, metadata = patch_whl(
whl_path = patch_whl(
rctx,
python_interpreter = _resolve_python_interpreter(rctx),
@@ -351,34 +375,34 @@ def _whl_archive_impl(rctx):
timeout = rctx.attr.timeout,
)
- rctx.symlink(whl_path, "whl")
+ rctx.symlink(whl_path, "file")
rctx.file(
"BUILD.bazel",
"""\
filegroup(
- name="whl",
+ name="file",
srcs=["{filename}"],
visibility=["//visibility:public"],
)
""".format(filename = whl_path.basename),
)
-whl_archive = repository_rule(
+pypi_archive = repository_rule(
attrs = {
"metadata": attr.label(mandatory = True, allow_single_file = True),
- "python_interpreter": attr.string(),
- "python_interpreter_target": attr.label(),
- "quiet": attr.bool(default = True),
- "sha256": attr.string(mandatory = False),
- "timeout": attr.int(default = 60),
- "whl_patches": attr.label_keyed_string_dict(
+ "patches": attr.label_keyed_string_dict(
doc = """"a label-keyed-string dict that has
json.encode(struct([whl_file], patch_strip]) as values. This
is to maintain flexibility and correct bzlmod extension interface
until we have a better way to define whl_library and move whl
patching to a separate place. INTERNAL USE ONLY.""",
),
+ "python_interpreter": attr.string(),
+ "python_interpreter_target": attr.label(),
+ "quiet": attr.bool(default = True),
+ "sha256": attr.string(mandatory = False),
+ "timeout": attr.int(default = 60),
},
doc = """A rule for bzlmod mulitple pip repository creation. PRIVATE USE ONLY.""",
implementation = _whl_archive_impl,
diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl
index aa933b6ff..62cb0658a 100644
--- a/python/private/bzlmod/pip.bzl
+++ b/python/private/bzlmod/pip.bzl
@@ -30,7 +30,7 @@ load("//python/private:parse_whl_name.bzl", "parse_whl_name")
load("//python/private:version_label.bzl", "version_label")
load(":minihub.bzl", "whl_library")
load(":pip_repository.bzl", "pip_repository")
-load(":pypi_metadata.bzl", "whl_lock")
+load(":pypi_metadata.bzl", "whl_files_from_requirements")
def _whl_mods_impl(mctx):
"""Implementation of the pip.whl_mods tag class.
@@ -81,7 +81,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
whl_mods = whl_mods,
)
-def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides):
+def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, files):
python_interpreter_target = pip_attr.python_interpreter_target
# if we do not have the python_interpreter set in the attributes
@@ -104,6 +104,10 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides):
hub_name,
version_label(pip_attr.python_version),
)
+
+ # how do we get rid of this?
+ # maybe we should resolve the extra_pip_args and the requirements lines per
+ # platform and do a more clever init.
requrements_lock = locked_requirements_label(module_ctx, pip_attr)
# Parse the requirements file directly in starlark to get the information
@@ -172,6 +176,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides):
environment = pip_attr.environment,
group_name = group_name,
group_deps = group_deps,
+ files = files[whl_name],
)
if whl_name not in whl_map[hub_name]:
@@ -295,14 +300,12 @@ def _pip_impl(module_ctx):
requirements = parse_result.requirements
all_requirements.extend([line for _, line in requirements])
- whl_lock(
+ files = whl_files_from_requirements(
+ name = "pypi_whl",
requirements = all_requirements,
#indexes = kwargs.get("indexes"),
)
- #lock_path = module_ctx.path(label("@whl_lock//:lock.json"))
- #fail(lock_path)
-
# Used to track all the different pip hubs and the spoke pip Python
# versions.
pip_hub_map = {}
@@ -347,7 +350,7 @@ def _pip_impl(module_ctx):
else:
pip_hub_map[pip_attr.hub_name].python_versions.append(pip_attr.python_version)
- _create_whl_repos(module_ctx, pip_attr, hub_whl_map, whl_overrides)
+ _create_whl_repos(module_ctx, pip_attr, hub_whl_map, whl_overrides, files)
for hub_name, whl_map in hub_whl_map.items():
pip_repository(
diff --git a/python/private/bzlmod/pypi_metadata.bzl b/python/private/bzlmod/pypi_metadata.bzl
index 79a48d9fd..ebe4ad503 100644
--- a/python/private/bzlmod/pypi_metadata.bzl
+++ b/python/private/bzlmod/pypi_metadata.bzl
@@ -15,9 +15,12 @@
"""PyPI metadata hub and spoke repos"""
load("//python/private:normalize_name.bzl", "normalize_name")
+load(":label.bzl", _label = "label")
+load(":minihub.bzl", "pypi_archive")
-def whl_lock(requirements, **kwargs):
+def whl_files_from_requirements(*, name, requirements, **kwargs):
indexes = kwargs.get("indexes", ["https://pypi.org/simple"])
+ whl_overrides = kwargs.get("whl_overrides", {})
sha_by_pkg = {}
for requirement in requirements:
@@ -32,28 +35,69 @@ def whl_lock(requirements, **kwargs):
for sha in sha256s:
sha_by_pkg[distribution][sha] = True
+ ret = {}
for distribution, shas in sha_by_pkg.items():
+ metadata = "{}_metadata__{}".format(name, distribution)
pypi_distribution_metadata(
- name = "{}_metadata".format(distribution),
+ name = metadata,
distribution = distribution,
sha256s = shas,
indexes = indexes,
)
-def _pypi_distribution_metadata_impl(rctx):
+ metadata = _label("@{}//:metadata.json".format(metadata))
+
+ files = {}
+ for sha256 in shas:
+ archive_name = "{}_{}_{}".format(name, distribution, sha256[:6])
+ files[sha256] = _label("@{}//:file".format(archive_name))
+
+ # We would use http_file, but we are passing the URL to use via a file,
+ # if the url is known (in case of using pdm lock), we could use an
+ # http_file.
+ pypi_archive(
+ name = archive_name,
+ metadata = metadata,
+ sha256 = sha256,
+ patches = {
+ p: json.encode(args)
+ for p, args in whl_overrides.get(distribution, {}).items()
+ },
+ # FIXME @aignas 2023-12-15: add usage of the DEFAULT_PYTHON_VERSION
+ # to get the hermetic interpreter
+ )
+
+ ret[distribution] = struct(
+ metadata = metadata,
+ files = files,
+ )
+
+ # return a {
+ # : struct(
+ # metadata =