|
1 | 1 | import logging |
| 2 | +import os |
2 | 3 | import pathlib |
| 4 | +import re |
| 5 | +import tarfile |
| 6 | +import tempfile |
| 7 | +import typing |
3 | 8 | from urllib.parse import urlparse |
4 | 9 |
|
5 | 10 | from packaging.requirements import Requirement |
| 11 | +from packaging.utils import canonicalize_name |
| 12 | +from packaging.version import Version |
6 | 13 |
|
7 | | -from fromager import context, external_commands |
| 14 | +from . import context, external_commands, tarballs |
8 | 15 |
|
9 | 16 | logger = logging.getLogger(__name__) |
10 | 17 |
|
@@ -61,3 +68,256 @@ def git_clone( |
61 | 68 | ) |
62 | 69 |
|
63 | 70 | return output_dir |
| 71 | + |
| 72 | + |
| 73 | +class BeforeSubmoduleCallback(typing.Protocol): |
| 74 | + """Before submodule update callback""" |
| 75 | + |
| 76 | + def __call__(self, *, clonedir: pathlib.Path, name: str, remote: str) -> None: |
| 77 | + pass |
| 78 | + |
| 79 | + |
| 80 | +def git_clone_and_tarball( |
| 81 | + *, |
| 82 | + destdir: pathlib.Path, |
| 83 | + prefix: tuple[Requirement, Version] | str, |
| 84 | + repo_url: str, |
| 85 | + tag: str | None = None, |
| 86 | + ref: str | None = None, |
| 87 | + before_submodule_update: BeforeSubmoduleCallback | None = None, |
| 88 | + git_archival_tag_match: str | None = None, |
| 89 | +) -> pathlib.Path: |
| 90 | + """Clone a git repository and generate a ball |
| 91 | +
|
| 92 | + This function creates a tar ball from a remote URL, with all submodules |
| 93 | + (non-recursive), and includes a ``.git_archival.txt`` for setuptools-scm. |
| 94 | +
|
| 95 | + :param destdir: directory where the tar ball is stored |
| 96 | + :param prefix: prefix of the tar ball and first level directory |
| 97 | + :param repo_url: git clone url |
| 98 | + :param tag: tag name to clone |
| 99 | + :param ref: git ref to clone (mutually exclusive with *tag*) |
| 100 | + :param before_submodule_update: callback that runs before |
| 101 | + ``git submodule update``. The callback is execute for each submodule. |
| 102 | + :param git_archival_tag_match: git describe tag pattern for ``.git_archival.txt`` |
| 103 | +
|
| 104 | + This example code creates a ``xformers-0.0.31.post1.tar.gz`` tar ball: |
| 105 | +
|
| 106 | + .. code-block:: |
| 107 | +
|
| 108 | + def cb(*, clonedir: pathlib.Path, name: str, remote: str) -> None: |
| 109 | + subprocess.check_call( |
| 110 | + ["git", "config", "set", f"submodule.{name}.url", mirror(remote)], |
| 111 | + cwd=str(clonedir) |
| 112 | + ) |
| 113 | +
|
| 114 | + req = Requirement("xformers") |
| 115 | + tag = "v0.0.31.post1" |
| 116 | + version = Version(tag) |
| 117 | + repo_url = "https://github.com/facebookresearch/xformers.git" |
| 118 | + destdir = pathlib.Path("destdir").absolute() |
| 119 | + tarball = git_clone_and_tarball( |
| 120 | + prefix=(req, version), |
| 121 | + destdir=destdir, |
| 122 | + repo_url=repo_url, |
| 123 | + tag=tag, |
| 124 | + before_submodule_update=cb, |
| 125 | + ) |
| 126 | + """ |
| 127 | + if isinstance(prefix, tuple): |
| 128 | + req = prefix[0] |
| 129 | + version = prefix[1] |
| 130 | + assert isinstance(req, Requirement) |
| 131 | + assert isinstance(version, Version) |
| 132 | + canon_name = canonicalize_name(req.name) |
| 133 | + prefix = f"{canon_name}-{version}" |
| 134 | + |
| 135 | + with tempfile.TemporaryDirectory() as tmpdir: |
| 136 | + clonedir = pathlib.Path(tmpdir).absolute() |
| 137 | + _git_clone( |
| 138 | + clonedir=clonedir, |
| 139 | + repo_url=repo_url, |
| 140 | + tag=tag, |
| 141 | + ref=ref, |
| 142 | + ) |
| 143 | + submodules = _git_submodule_list(clonedir=clonedir) |
| 144 | + if before_submodule_update is not None: |
| 145 | + for name, remote in submodules.items(): |
| 146 | + before_submodule_update(clonedir=clonedir, name=name, remote=remote) |
| 147 | + _get_submodule_update(clonedir=clonedir) |
| 148 | + _make_git_archival_txt( |
| 149 | + clonedir=clonedir, |
| 150 | + tag_match=git_archival_tag_match, |
| 151 | + ) |
| 152 | + tarball = _create_tarball( |
| 153 | + clonedir=clonedir, |
| 154 | + destdir=destdir, |
| 155 | + prefix=prefix, |
| 156 | + ) |
| 157 | + |
| 158 | + return tarball |
| 159 | + |
| 160 | + |
| 161 | +def _git_clone( |
| 162 | + *, |
| 163 | + clonedir: pathlib.Path, |
| 164 | + repo_url: str, |
| 165 | + tag: str | None, |
| 166 | + ref: str | None, |
| 167 | +) -> None: |
| 168 | + """Clone a git repository into *clonedir* |
| 169 | +
|
| 170 | + Initializes submodules |
| 171 | + """ |
| 172 | + if not bool(tag) ^ bool(ref): |
| 173 | + raise ValueError("tag and ref are mutually exclusive") |
| 174 | + |
| 175 | + # Create a clean URL without any credentials for logging |
| 176 | + parsed_url = urlparse(repo_url) |
| 177 | + clean_url = parsed_url._replace(netloc=parsed_url.hostname or "").geturl() |
| 178 | + logger.info(f"cloning {clean_url}, tag {tag}, ref {ref}, into {clonedir}") |
| 179 | + |
| 180 | + cmd: list[str] = ["git", "clone"] |
| 181 | + if tag is not None: |
| 182 | + # --branch works with branches and tags, but not with commits |
| 183 | + cmd.extend(["--branch", tag, "--depth", "1"]) |
| 184 | + cmd.extend([repo_url, str(clonedir)]) |
| 185 | + external_commands.run(cmd, network_isolation=False) |
| 186 | + |
| 187 | + # --branch only works with names, so we have to checkout the reference we |
| 188 | + # actually want if it is not a name |
| 189 | + if ref is not None: |
| 190 | + external_commands.run( |
| 191 | + ["git", "checkout", "--force", ref], |
| 192 | + cwd=str(clonedir), |
| 193 | + network_isolation=False, |
| 194 | + ) |
| 195 | + |
| 196 | + # initialize submodule but do not fetch them, yet, to allow customization. |
| 197 | + external_commands.run( |
| 198 | + ["git", "submodule", "init"], |
| 199 | + cwd=str(clonedir), |
| 200 | + network_isolation=False, |
| 201 | + ) |
| 202 | + |
| 203 | + |
| 204 | +_SUBMODULE_RE = re.compile(r"^submodule\.(.*)\.url=(.*)$") |
| 205 | + |
| 206 | + |
| 207 | +def _git_submodule_list(*, clonedir: pathlib.Path) -> dict[str, str]: |
| 208 | + """Get submodule mapping of name -> remote |
| 209 | +
|
| 210 | + Submodule must be initialized |
| 211 | + """ |
| 212 | + out = external_commands.run( |
| 213 | + ["git", "config", "list", "--local"], |
| 214 | + cwd=str(clonedir), |
| 215 | + network_isolation=False, |
| 216 | + ) |
| 217 | + submodules = {} |
| 218 | + for line in out.split("\n"): |
| 219 | + if mo := _SUBMODULE_RE.match(line): |
| 220 | + name, remote = mo.groups() |
| 221 | + submodules[name] = remote |
| 222 | + logger.debug(f"found submodules: {submodules}") |
| 223 | + return submodules |
| 224 | + |
| 225 | + |
| 226 | +def _get_submodule_update(*, clonedir) -> None: |
| 227 | + """Update and fetch submodules""" |
| 228 | + external_commands.run( |
| 229 | + ["git", "submodule", "update", "--force", "--depth", "1"], |
| 230 | + cwd=str(clonedir), |
| 231 | + network_isolation=False, |
| 232 | + ) |
| 233 | + |
| 234 | + |
| 235 | +def _make_git_archival_txt( |
| 236 | + clonedir: pathlib.Path, |
| 237 | + *, |
| 238 | + tag_match: str | None = None, |
| 239 | +) -> str: |
| 240 | + """Generate a .git_archival.txt file for setuptools-scm |
| 241 | +
|
| 242 | + https://setuptools-scm.readthedocs.io/en/latest/usage/#git-archives |
| 243 | + """ |
| 244 | + if not tag_match: |
| 245 | + tag_match = "*[0-9]*" |
| 246 | + # ignore existing .git_archive.txt template |
| 247 | + # TODO: Figure out how to use an existing file and replace its template variables. |
| 248 | + archival = clonedir / ".git_archival.txt" |
| 249 | + parts = [ |
| 250 | + "node: %H", # commit hash |
| 251 | + "node-date: %cI", # commit date |
| 252 | + f"describe-name: %(describe:tags=true,match={tag_match})", # tag + commits since tags |
| 253 | + ] |
| 254 | + sep = "\n" # cannot use backslash in f-strings on Python 3.11 |
| 255 | + out = external_commands.run( |
| 256 | + [ |
| 257 | + "git", |
| 258 | + "log", |
| 259 | + f"--pretty=tformat:{sep.join(parts)}", |
| 260 | + "-1", |
| 261 | + ], |
| 262 | + cwd=str(clonedir), |
| 263 | + network_isolation=False, |
| 264 | + ) |
| 265 | + archival.write_text(out) |
| 266 | + logger.debug(f"Generated {archival} with content: \n{out}") |
| 267 | + return out |
| 268 | + |
| 269 | + |
| 270 | +def _create_tarball( |
| 271 | + *, |
| 272 | + clonedir: pathlib.Path, |
| 273 | + destdir: pathlib.Path, |
| 274 | + prefix: str, |
| 275 | +) -> pathlib.Path: |
| 276 | + """Create a tarball from a git checkout""" |
| 277 | + # check for '/' in prefix |
| 278 | + if os.sep in prefix: |
| 279 | + raise ValueError(f"{prefix=} cannot contain {os.sep}") |
| 280 | + |
| 281 | + tarball = destdir / f"{prefix}.tar.gz" |
| 282 | + if tarball.is_file(): |
| 283 | + logger.debug(f"removing stale tar ball {tarball}") |
| 284 | + tarball.unlink() |
| 285 | + |
| 286 | + with tarfile.open(tarball, "x:gz", format=tarfile.PAX_FORMAT) as tar: |
| 287 | + tarballs.tar_reproducible_with_prefix( |
| 288 | + tar=tar, |
| 289 | + basedir=clonedir, |
| 290 | + prefix=pathlib.PurePath(prefix), |
| 291 | + exclude_vcs=True, |
| 292 | + ) |
| 293 | + return tarball |
| 294 | + |
| 295 | + |
| 296 | +def test(): |
| 297 | + logging.basicConfig(level=logging.DEBUG) |
| 298 | + def cb(*, clonedir: pathlib.Path, name: str, remote: str) -> None: |
| 299 | + print(name, remote) |
| 300 | + |
| 301 | + if True: |
| 302 | + tag = "v0.0.31.post1" |
| 303 | + version = Version(tag) |
| 304 | + req = Requirement("xformers") |
| 305 | + repo_url = "https://github.com/facebookresearch/xformers.git" |
| 306 | + else: |
| 307 | + tag = "0.54.0" |
| 308 | + version = Version(tag) |
| 309 | + req = Requirement("fromager") |
| 310 | + repo_url = "https://github.com/python-wheel-build/fromager.git" |
| 311 | + destdir = pathlib.Path(".").absolute() |
| 312 | + tarball = git_clone_and_tarball( |
| 313 | + destdir=destdir, |
| 314 | + prefix=(req, version), |
| 315 | + repo_url=repo_url, |
| 316 | + tag=tag, |
| 317 | + before_submodule_update=cb, |
| 318 | + ) |
| 319 | + print(tarball) |
| 320 | + |
| 321 | + |
| 322 | +if __name__ == "__main__": |
| 323 | + test() |
0 commit comments