diff --git a/node/flatpak_node_generator/manifest.py b/node/flatpak_node_generator/manifest.py index 8435ec0b..4b0041ad 100644 --- a/node/flatpak_node_generator/manifest.py +++ b/node/flatpak_node_generator/manifest.py @@ -162,9 +162,18 @@ def add_data_source(self, data: Union[str, bytes], destination: Path) -> None: self._add_source_with_destination(source, destination, is_dir=False) def add_git_source( - self, url: str, commit: str, destination: Optional[Path] = None + self, + url: str, + commit: Optional[str] = None, + destination: Optional[Path] = None, + tag: Optional[str] = None, ) -> None: - source = {'type': 'git', 'url': url, 'commit': commit} + source = {'type': 'git', 'url': url} + assert commit or tag + if commit: + source['commit'] = commit + if tag: + source['tag'] = tag self._add_source_with_destination(source, destination, is_dir=True) def add_script_source(self, commands: List[str], destination: Path) -> None: diff --git a/node/flatpak_node_generator/providers/yarn.py b/node/flatpak_node_generator/providers/yarn.py index 33921ed4..f97e4ab6 100644 --- a/node/flatpak_node_generator/providers/yarn.py +++ b/node/flatpak_node_generator/providers/yarn.py @@ -1,6 +1,8 @@ from pathlib import Path -from typing import Iterator, List, Optional, Type +from typing import Dict, Iterator, List, NamedTuple, Optional, Type +import base64 +import json import os import re import types @@ -9,6 +11,7 @@ from ..integrity import Integrity from ..manifest import ManifestGenerator from ..package import GitSource, LocalSource, Package, PackageSource, ResolvedSource +from ..requests import Requests from . import LockfileProvider, ModuleProvider, ProviderFactory, RCFileProvider from .npm import NpmRCFileProvider from .special import SpecialSourceProvider @@ -27,6 +30,10 @@ class YarnLockfileProvider(LockfileProvider): _LOCAL_PKG_RE = re.compile(r'^(?:file|link):') + def __init__(self) -> None: + self.version = 1 + self.cacheKey = str() + @staticmethod def is_git_version(version: str) -> bool: for pattern in GIT_URL_PATTERNS: @@ -44,7 +51,86 @@ def unquote(self, string: str) -> str: else: return string - def parse_package_section(self, lockfile: Path, section: List[str]) -> Package: + def get_filed_value(self, line: str) -> str: + return self.unquote(line.split(' ', 1)[1]) + + def parse_metadata_section(self, section: List[str]) -> None: + line = None + for line in section[1:]: + line = line.strip() + if line.startswith('version'): + self.version = int(self.get_filed_value(line), base=10) + elif line.startswith('cacheKey'): + self.cacheKey = self.get_filed_value(line) + assert self.version > 1 + assert self.cacheKey + + def parse_package_section( + self, lockfile: Path, section: List[str] + ) -> Optional[Package]: + assert section + name_line = section[0] + assert name_line.endswith(':'), name_line + name_line = name_line[:-1] + + name = self.unquote(name_line).split(',', 1)[0] + name, _ = name.rsplit('@', 1) + + # ignore patch, it will be generated by yarn + if name.find('@patch:') != -1: + return None + + version: Optional[str] = None + resolved: Optional[str] = None + integrity: Optional[Integrity] = None + + section_indent = 0 + + line = None + for line in section[1:]: + indent = 0 + while line[indent].isspace(): + indent += 1 + + assert indent, line + if not section_indent: + section_indent = indent + elif indent > section_indent: + # Inside some nested section. + continue + + line = line.strip() + + if line.startswith('version'): + version = self.get_filed_value(line) + elif line.startswith('checksum'): + value = self.get_filed_value(line) + integrity = Integrity(algorithm='sha512', digest=value) + elif line.startswith('linkType'): + if self.get_filed_value(line) == 'soft': + return None + elif line.startswith('conditions'): + # match = re.search(r'os=([^=]+)', line) + pass + elif line.startswith('resolution'): + resolution = self.get_filed_value(line) + resolved = f'resolution#{resolution}' + + assert resolved and version, section + + if not integrity: + integrity = Integrity(algorithm='sha512', digest=self.cacheKey) + + source: PackageSource + + if self.is_git_version(resolved): + source = self.parse_git_source(version=resolved) + else: + source = ResolvedSource(resolved=resolved, integrity=integrity) + + return Package(name=name, version=version, source=source, lockfile=lockfile) + + def parse_package_section_v1(self, lockfile: Path, section: List[str]) -> Package: assert section name_line = section[0] assert name_line.endswith(':'), name_line @@ -103,6 +189,12 @@ def parse_package_section(self, lockfile: Path, section: List[str]) -> Package: return Package(name=name, version=version, source=source, lockfile=lockfile) def process_lockfile(self, lockfile: Path) -> Iterator[Package]: + def parse_section(section: List[str]) -> Optional[Package]: + if self.version == 1: + return self.parse_package_section_v1(lockfile, section) + else: + return self.parse_package_section(lockfile, section) + section: List[str] = [] with open(lockfile) as fp: @@ -112,13 +204,20 @@ def process_lockfile(self, lockfile: Path) -> Iterator[Package]: if not line[0].isspace(): if section: - yield self.parse_package_section(lockfile, section) + if section[0].startswith('__metadata'): + self.parse_metadata_section(section) + else: + package = parse_section(section) + if package: + yield package section = [] section.append(line) if section: - yield self.parse_package_section(lockfile, section) + package = parse_section(section) + if package: + yield package class YarnRCFileProvider(RCFileProvider): @@ -126,6 +225,25 @@ class YarnRCFileProvider(RCFileProvider): class YarnModuleProvider(ModuleProvider): + class Locator(NamedTuple): + scope: str + name: str + reference: str + + _GIT_PROTOCOLS = ['commit', 'head', 'tag', 'semver'] + + class GitRepoUrlParts(NamedTuple): + repo: str + protocol: Optional[str] + request: str + extra: Optional[Dict[str, str]] + + # From https://github.com/yarnpkg/berry/blob/%40yarnpkg/shell%2F3.1.0/packages/yarnpkg-core/sources/structUtils.ts#L412 + _RESOLUTION_RE = re.compile(r'^(?:@([^/]+?)\/)?([^/]+?)(?:@(.+))$') + # From https://github.com/yarnpkg/berry/blob/%40yarnpkg/shell%2F3.1.0/packages/yarnpkg-core/sources/structUtils.ts#L462 + _REFERENCE_RE = re.compile( + r'^([^#:]*:)?((?:(?!::)[^#])*)(?:#((?:(?!::).)*))?(?:::(.*))?$' + ) # From https://github.com/yarnpkg/yarn/blob/v1.22.4/src/fetchers/tarball-fetcher.js _PACKAGE_TARBALL_URL_RE = re.compile( r'(?:(@[^/]+)(?:/|%2f))?[^/]+/(?:-|_attachments)/(?:@[^/]+/)?([^/]+)$' @@ -135,6 +253,8 @@ def __init__(self, gen: ManifestGenerator, special: SpecialSourceProvider) -> No self.gen = gen self.special_source_provider = special self.mirror_dir = self.gen.data_root / 'yarn-mirror' + self.registry = 'https://registry.yarnpkg.com' + self.has_resolution = False def __exit__( self, @@ -142,25 +262,155 @@ def __exit__( exc_value: Optional[BaseException], tb: Optional[types.TracebackType], ) -> None: - pass + self._finalize() + + def get_resolution_from_resolved(self, resolved: str) -> str: + assert resolved.startswith('resolution#') + return resolved[len('resolution#') :] + + def get_locator_url(self, locator: Locator) -> str: + if locator.scope: + return f'/@{locator.scope}%2f{locator.name}' + else: + return f'/{locator.name}' + + def get_locator_from_resolution(self, resolution: str) -> Locator: + match = self._RESOLUTION_RE.match(resolution) + assert match + scope, name, ref = [s or '' for s in match.groups()] + return self.Locator(scope=scope, name=name, reference=ref) + + def name_base64_locator(self, locator: Locator, resolution: str) -> str: + return f'{locator.name}-{base64.b64encode(resolution.encode()).decode()}' + + # From https://github.com/yarnpkg/berry/blob/%40yarnpkg/shell%2F3.1.0/packages/plugin-git/sources/gitUtils.ts#L56 + def parse_git_subsequent(self, url: str) -> GitRepoUrlParts: + repo, subsequent = url.split('#', 1) + protocol: Optional[str] = None + request: str = '' + extra: Dict[str, str] = {} + if not subsequent: + return self.GitRepoUrlParts( + repo=repo, protocol='head', request='HEAD', extra=None + ) + if re.match(r'^[a-z]+=', subsequent): + queries = urllib.parse.parse_qs(subsequent) + for q in queries.keys(): + if q in self._GIT_PROTOCOLS: + protocol = q + request = queries[q][0] + else: + extra[q] = queries[q][-1] + if not request: + protocol, request = 'head', 'HEAD' + return self.GitRepoUrlParts( + repo=repo, protocol=protocol, request=request, extra=extra + ) + else: + protocol, request = subsequent.split(':', 1) + if not request: + protocol, request = None, subsequent + return self.GitRepoUrlParts( + repo=repo, protocol=protocol, request=request, extra=None + ) + + async def resolve_source(self, locator: Locator, version: str) -> ResolvedSource: + data_url = f'{self.registry}{self.get_locator_url(locator)}' + # NOTE: Not cachable, because this is an API call. + raw_data = await Requests.instance.read_all(data_url, cachable=False) + data = json.loads(raw_data) + + assert 'versions' in data, f'{data_url} returned an invalid package index' + + versions = data['versions'] + assert ( + version in versions + ), f'{locator.name} versions available are {", ".join(versions)}, not {version}' + + dist = versions[version]['dist'] + assert 'tarball' in dist, f'{locator.name}@{version} has no tarball in dist' + + integrity: Integrity + if 'integrity' in dist: + integrity = Integrity.parse(dist['integrity']) + elif 'shasum' in dist: + integrity = Integrity.from_sha1(dist['shasum']) + else: + assert False, f'{locator.name}@{version} has no integrity in dist' + + return ResolvedSource(resolved=dist['tarball'], integrity=integrity) async def generate_package(self, package: Package) -> None: source = package.source if isinstance(source, ResolvedSource): - integrity = await source.retrieve_integrity() - url_parts = urllib.parse.urlparse(source.resolved) - match = self._PACKAGE_TARBALL_URL_RE.search(url_parts.path) - if match is not None: - scope, filename = match.groups() - if scope: - filename = f'{scope}-{filename}' + if source.resolved.startswith('resolution#'): + if not self.has_resolution: + self.has_resolution = True + assert source.integrity, f'{source.resolved}' + resolution = self.get_resolution_from_resolved(source.resolved) + locator = self.get_locator_from_resolution(resolution) + filename = f'{self.name_base64_locator(locator, resolution)}-{source.integrity.digest[:10]}.tgz' + if YarnLockfileProvider.is_git_version(locator.reference): + git_parts = self.parse_git_subsequent(locator.reference) + repo_dir = self.gen.tmp_root / locator.name + if git_parts.protocol == 'commit' or git_parts.protocol == None: + self.gen.add_git_source( + git_parts.repo, + commit=git_parts.request, + destination=repo_dir, + ) + elif git_parts.protocol == 'tag': + self.gen.add_git_source( + git_parts.repo, tag=git_parts.request, destination=repo_dir + ) + else: + assert ( + False + ), f'Not supported git protocol: {git_parts.protocol}' + target_tar = os.path.relpath( + self.mirror_dir / f'{filename}', repo_dir + ) + # fmt: off + self.gen.add_command( + '\n'.join( + [ + f'cd {repo_dir};', + 'if [ -f "yarn.lock" ];then', + 'yarn set version classic --only-if-needed;', + f'yarn pack --filename {target_tar};', + 'elif [ -f "package-lock.json" ];then', + 'file="$(npm pack --silent)";', + f'mv "$file" {target_tar};', + 'fi', + ] + ) + ) + # fmt: on + else: + resolved_source = await self.resolve_source( + locator, package.version + ) + assert resolved_source.integrity + self.gen.add_url_source( + resolved_source.resolved, + resolved_source.integrity, + self.mirror_dir / filename, + ) else: - filename = os.path.basename(url_parts.path) - - self.gen.add_url_source( - source.resolved, integrity, self.mirror_dir / filename - ) + integrity = await source.retrieve_integrity() + url_parts = urllib.parse.urlparse(source.resolved) + match = self._PACKAGE_TARBALL_URL_RE.search(url_parts.path) + if match is not None: + scope, filename = match.groups() + if scope: + filename = f'{scope}-{filename}' + else: + filename = os.path.basename(url_parts.path) + + self.gen.add_url_source( + source.resolved, integrity, self.mirror_dir / filename + ) elif isinstance(source, GitSource): repo_name = urllib.parse.urlparse(source.url).path.split('/')[-1] @@ -184,6 +434,92 @@ async def generate_package(self, package: Package) -> None: await self.special_source_provider.generate_special_sources(package) + def _finalize(self) -> None: + yarn2_plugin_source = r""" +module.exports = { + name: `flatpak-builder`, + factory: require => { + const { BaseCommand } = require(`@yarnpkg/cli`); + const { Configuration, structUtils, tgzUtils } = require('@yarnpkg/core') + const { ZipFS } = require('@yarnpkg/fslib'); + const { getLibzipPromise } = require('@yarnpkg/libzip'); + const fs = require('fs'); + class convertToZipCommand extends BaseCommand { + static paths = [[`convertToZip`]]; + async execute() { + const configuration = await Configuration.find(this.context.cwd, + this.context.plugins); + //const lockfile = configuration.get('lockfileFilename'); + const cacheFolder = configuration.get('cacheFolder'); + const compressionLevel = configuration.get(`compressionLevel`); + this.context.stdout.write(`yarn cacheFolder: ${cacheFolder}\n`); + const convertToZip = async (tgz, target, opts) => { + const { compressionLevel, ...bufferOpts } = opts; + const zipFs = new ZipFS(target, { + create: true, + libzip: await getLibzipPromise(), + level: compressionLevel + }); + const tgzBuffer = fs.readFileSync(tgz); + await tgzUtils.extractArchiveTo(tgzBuffer, zipFs, + bufferOpts); + zipFs.saveAndClose(); + } + this.context.stdout.write(`converting cache to zip\n`); + const files = fs.readdirSync(cacheFolder); + const tasks = [] + for (const i in files) { + const file = `${files[i]}`; + const tgzFile = `${cacheFolder}/${file}`; + const match = file.match(/([^-]+)-(.{1,10}).tgz/); + if (!match) { + this.context.stdout.write(`ignore ${file}\n`); + continue; + } + const resolution = Buffer.from(match[1], 'base64').toString(); + const locator = structUtils.parseLocator(resolution, true); + const sha = match[2]; + const filename = + `${structUtils.slugifyLocator(locator)}-${sha}.zip`; + const targetFile = `${cacheFolder}/${filename}` + tasks.push(async () => { + return await convertToZip(tgzFile, targetFile, { + compressionLevel: compressionLevel, + prefixPath: `node_modules/${structUtils.stringifyIdent(locator)}`, + stripComponents: 1, + }); + }); + } + while (tasks.length) { + await Promise.all(tasks.splice(0, 128).map(t => t())); + } + this.context.stdout.write(`converting finished\n`); + } + } + return { + commands: [ + convertToZipCommand + ], + }; + } +}; +""" + if self.has_resolution: + js_dest = self.gen.data_root / 'flatpak-builder.js' + self.gen.add_data_source(yarn2_plugin_source, destination=js_dest) + script_dest = self.gen.data_root / 'yarn2-setup.sh' + self.gen.add_script_source( + [ + 'yarn config set enableTelemetry false', + 'yarn config set enableNetwork false', + 'yarn config set enableGlobalCache false', + f'yarn config set cacheFolder $FLATPAK_BUILDER_BUILDDIR/{self.mirror_dir}', + f'yarn plugin import $FLATPAK_BUILDER_BUILDDIR/{js_dest}', + 'yarn convertToZip', + ], + destination=script_dest, + ) + class YarnProviderFactory(ProviderFactory): def __init__(self) -> None: