From 3dd780b9d1a85deaca03e85cd75b2dd2550e0bff Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Wed, 5 Feb 2025 15:55:47 +0200 Subject: [PATCH 1/4] add parsers from extension --- metaflow/plugins/pypi/parsers.py | 260 +++++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 metaflow/plugins/pypi/parsers.py diff --git a/metaflow/plugins/pypi/parsers.py b/metaflow/plugins/pypi/parsers.py new file mode 100644 index 00000000000..d60da0875e7 --- /dev/null +++ b/metaflow/plugins/pypi/parsers.py @@ -0,0 +1,260 @@ +import re +from typing import Any, Dict, List, Optional + +from packaging.requirements import InvalidRequirement, Requirement + +REQ_SPLIT_LINE = re.compile(r"([^~<=>]*)([~<=>]+.*)?") + +# Allows things like: +# pkg = <= version +# pkg <= version +# pkg = version +# pkg = ==version or pkg = =version +# In other words, the = is optional but possible +YML_SPLIT_LINE = re.compile(r"(?:=\s)?(<=|>=|~=|==|<|>|=)") + + +def req_parser(config_value: str) -> Dict[str, Any]: + extra_args = {} + sources = {} + deps = {} + np_deps = {} + sys_deps = {} + python_version = parse_req_value( + config_value, extra_args, sources, deps, np_deps, sys_deps + ) + result = {} + if python_version: + result["python"] = python_version + + if extra_args: + raise ValueError( + "Additional arguments are not supported when parsing requirements.txt for " + "the pypi decorator -- use Netflix's Metaflow extensions (metaflow-netflixext)'s " + "named environment instead" + ) + if np_deps: + raise ValueError( + "Non-python dependencies are not supported when parsing requirements.txt for " + "the pypi decorator -- use Netflix's Metaflow extensions (metaflow-netflixext)'s " + "named environment instead" + ) + if sys_deps: + raise ValueError( + "System dependencies are not supported when parsing requirements.txt for " + "the pypi decorator -- use Netflix's Metaflow extensions (metaflow-netflixext)'s " + "named environment instead" + ) + + if sources: + raise ValueError( + "Specifying extra indices is not supported. Include those sources " + "directly in your pip.conf or use Netflix's Metaflow extensions " + "(metaflow-netflixext)" + ) + + result["packages"] = deps + + return result + + +def yml_parser(config_value: str) -> Dict[str, Any]: + sources = {} + conda_deps = {} + pypi_deps = {} + sys_deps = {} + python_version = parse_yml_value( + config_value, {}, sources, conda_deps, pypi_deps, sys_deps + ) + result = {} + if sys_deps: + raise ValueError( + "System dependencies are not supported when parsing environment.yml for " + "the conda decorator -- use Netflix's Metaflow extensions (metaflow-netflixext)'s " + "named environment instead" + ) + + if python_version: + result["python"] = python_version + + if sources: + raise ValueError( + "Channels or extra indices are not supported when parsing environment.yml for " + "the conda decorator -- use Netflix's Metaflow extensions (metaflow-netflixext) " + "or specify CONDA_CHANNELS (for channels) and set indices in your pip.conf" + ) + if pypi_deps: + raise ValueError( + "Mixing conda and pypi packages is not supported when parsing environment.yml for " + "the conda decorator -- use Netflix's Metaflow extensions (metaflow-netflixext) " + "or stick to only one ecosystem" + ) + + if len(conda_deps): + result["libraries"] = conda_deps + + return result + + +def parse_req_value( + file_content: str, + extra_args: Dict[str, List[str]], + sources: Dict[str, List[str]], + deps: Dict[str, str], + np_deps: Dict[str, str], + sys_deps: Dict[str, str], +) -> Optional[str]: + python_version = None + for line in file_content.splitlines(): + line = line.strip() + if not line: + continue + splits = line.split(maxsplit=1) + first_word = splits[0] + if len(splits) > 1: + rem = splits[1] + else: + rem = None + if first_word in ("-i", "--index-url"): + raise ValueError("To specify a base PYPI index, set it in your pip.conf") + elif first_word == "--extra-index-url" and rem: + sources.setdefault("pypi", []).append(rem) + elif first_word in ("-f", "--find-links", "--trusted-host") and rem: + extra_args.setdefault("pypi", []).append(" ".join([first_word, rem])) + elif first_word in ("--pre", "--no-index"): + extra_args.setdefault("pypi", []).append(first_word) + elif first_word == "--conda-channel" and rem: + sources.setdefault("conda", []).append(rem) + elif first_word == "--conda-pkg": + # Special extension to allow non-python conda package specification + split_res = REQ_SPLIT_LINE.match(splits[1]) + if split_res is None: + raise ValueError("Could not parse conda package '%s'" % splits[1]) + s = split_res.groups() + if s[1] is None: + np_deps[s[0].replace(" ", "")] = "" + else: + np_deps[s[0].replace(" ", "")] = s[1].replace(" ", "").lstrip("=") + elif first_word == "--sys-pkg": + # Special extension to allow the specification of system dependencies + # (currently __cuda and __glibc) + split_res = REQ_SPLIT_LINE.match(splits[1]) + if split_res is None: + raise ValueError("Could not parse system package '%s'" % splits[1]) + s = split_res.groups() + pkg_name = s[0].replace(" ", "") + if s[1] is None: + raise ValueError("System package '%s' requires a version" % pkg_name) + sys_deps[pkg_name] = s[1].replace(" ", "").lstrip("=") + elif first_word.startswith("#"): + continue + elif first_word.startswith("-"): + raise ValueError( + "'%s' is not a supported line in a requirements.txt" % line + ) + else: + try: + parsed_req = Requirement(line) + except InvalidRequirement as ex: + raise ValueError("Could not parse '%s'" % line) from ex + if parsed_req.marker is not None: + raise ValueError( + "Environment markers are not supported for '%s'" % line + ) + dep_name = parsed_req.name + if parsed_req.extras: + dep_name += "[%s]" % ",".join(parsed_req.extras) + if parsed_req.url: + dep_name += "@%s" % parsed_req.url + specifier = str(parsed_req.specifier).lstrip(" =") + if dep_name == "python": + if specifier: + python_version = specifier + else: + deps[dep_name] = specifier + return python_version + + +def parse_yml_value( + file_content: str, + _: Dict[str, List[str]], + sources: Dict[str, List[str]], + conda_deps: Dict[str, str], + pypi_deps: Dict[str, str], + sys_deps: Dict[str, str], +) -> Optional[str]: + python_version = None # type: Optional[str] + mode = None + for line in file_content.splitlines(): + if not line: + continue + elif line[0] not in (" ", "-"): + line = line.strip() + if line == "channels:": + mode = "sources" + elif line == "dependencies:": + mode = "deps" + elif line == "pypi-indices:": + mode = "pypi_sources" + else: + mode = "ignore" + elif mode and mode.endswith("sources"): + line = line.lstrip(" -").rstrip() + sources.setdefault("conda" if mode == "sources" else "pypi", []).append( + line + ) + elif mode and mode.endswith("deps"): + line = line.lstrip(" -").rstrip() + if line == "pip:": + mode = "pypi_deps" + elif line == "sys:": + mode = "sys_deps" + else: + to_update = ( + conda_deps + if mode == "deps" + else pypi_deps if mode == "pypi_deps" else sys_deps + ) + splits = YML_SPLIT_LINE.split(line.replace(" ", ""), maxsplit=1) + if len(splits) == 1: + if splits[0] != "python": + if mode == "sys_deps": + raise ValueError( + "System package '%s' requires a version" % splits[0] + ) + to_update[splits[0]] = "" + else: + dep_name, dep_operator, dep_version = splits + if dep_operator not in ("=", "=="): + if mode == "sys_deps": + raise ValueError( + "System package '%s' requires a specific version not '%s'" + % (splits[0], dep_operator + dep_version) + ) + dep_version = dep_operator + dep_version + if dep_name == "python": + if dep_version: + if python_version: + raise ValueError( + "Python versions specified multiple times in " + "the YAML file." + ) + python_version = dep_version + else: + if ( + dep_name.startswith("/") + or dep_name.startswith("git+") + or dep_name.startswith("https://") + or dep_name.startswith("ssh://") + ): + # Handle the case where only the URL is specified + # without a package name + depname_and_maybe_tag = dep_name.split("/")[-1] + depname = depname_and_maybe_tag.split("@")[0] + if depname.endswith(".git"): + depname = depname[:-4] + dep_name = "%s@%s" % (depname, dep_name) + + to_update[dep_name] = dep_version + + return python_version From a33eef74f1dace0ca72cf387a134aff19c87bb39 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Wed, 5 Feb 2025 16:14:57 +0200 Subject: [PATCH 2/4] add parsers as top-level imports --- metaflow/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/metaflow/__init__.py b/metaflow/__init__.py index 418940216b5..6c25c004e91 100644 --- a/metaflow/__init__.py +++ b/metaflow/__init__.py @@ -113,6 +113,9 @@ class and related decorators. lazy_load_aliases({"metaflow.datatools": "metaflow.plugins.datatools"}) from .plugins.datatools import S3 +# requirement parsers, top-level import for convenience +from .plugins.pypi.parsers import req_parser, yml_parser + # includefile from .includefile import IncludeFile From 8e49e0752068cb885ad2230ecdd30f4cd62e1bf0 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Wed, 5 Feb 2025 16:15:19 +0200 Subject: [PATCH 3/4] fix parsers dependence on packaging --- metaflow/plugins/pypi/parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metaflow/plugins/pypi/parsers.py b/metaflow/plugins/pypi/parsers.py index d60da0875e7..f80bc8f6a39 100644 --- a/metaflow/plugins/pypi/parsers.py +++ b/metaflow/plugins/pypi/parsers.py @@ -1,8 +1,6 @@ import re from typing import Any, Dict, List, Optional -from packaging.requirements import InvalidRequirement, Requirement - REQ_SPLIT_LINE = re.compile(r"([^~<=>]*)([~<=>]+.*)?") # Allows things like: @@ -104,6 +102,8 @@ def parse_req_value( np_deps: Dict[str, str], sys_deps: Dict[str, str], ) -> Optional[str]: + from packaging.requirements import InvalidRequirement, Requirement + python_version = None for line in file_content.splitlines(): line = line.strip() From 427541c4ca002c32ef39c05a99b74af369181d54 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Mon, 17 Feb 2025 12:48:43 +0200 Subject: [PATCH 4/4] wip: add toml parser for config --- metaflow/plugins/pypi/parsers.py | 63 ++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/metaflow/plugins/pypi/parsers.py b/metaflow/plugins/pypi/parsers.py index f80bc8f6a39..bc90309c9dc 100644 --- a/metaflow/plugins/pypi/parsers.py +++ b/metaflow/plugins/pypi/parsers.py @@ -94,6 +94,20 @@ def yml_parser(config_value: str) -> Dict[str, Any]: return result +def toml_parser(config_value: str) -> Dict[str, Any]: + pypi_deps = {} + python_version = parse_toml_value(config_value, pypi_deps) + result = {} + + if python_version: + result["python"] = python_version + + if pypi_deps: + result["packages"] = pypi_deps + + return result + + def parse_req_value( file_content: str, extra_args: Dict[str, List[str]], @@ -258,3 +272,52 @@ def parse_yml_value( to_update[dep_name] = dep_version return python_version + + +def parse_toml_value( + file_content: str, + pypi_deps: Dict[str, str], +): + from packaging.requirements import Requirement, InvalidRequirement + + python_version = None + try: + import tomllib as toml + except ImportError: + try: + # try to import a backported toml library for python version <3.11 + import tomli as toml + except ImportError: + raise Exception( + "Could not import a TOML library for parsing. For Python <3.11, please install 'tomli'" + ) + + content = toml.loads(file_content) + + project = content.get("project", {}) + if project.get("requires-python"): + python_version = project["requires-python"] + + if project.get("dependencies"): + for line in project["dependencies"]: + try: + parsed_req = Requirement(line) + except InvalidRequirement as ex: + raise ValueError("Could not parse '%s'" % line) from ex + if parsed_req.marker is not None: + raise ValueError( + "Environment markers are not supported for '%s'" % line + ) + dep_name = parsed_req.name + if parsed_req.extras: + dep_name += "[%s]" % ",".join(parsed_req.extras) + if parsed_req.url: + dep_name += "@%s" % parsed_req.url + specifier = str(parsed_req.specifier).lstrip(" =") + if dep_name == "python": + if specifier: + python_version = specifier + else: + pypi_deps[dep_name] = specifier + + return python_version