Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise error in spacy package when model name is not a valid python identifier #10192

Merged
merged 13 commits into from
Feb 10, 2022
38 changes: 37 additions & 1 deletion spacy/cli/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from catalogue import RegistryError
import srsly
import sys
import re

from ._util import app, Arg, Opt, string_to_list, WHEEL_SUFFIX, SDIST_SUFFIX
from ..schemas import validate, ModelMetaSchema
Expand Down Expand Up @@ -109,6 +110,24 @@ def package(
", ".join(meta["requirements"]),
)
if name is not None:
if not name.isidentifier():
msg.fail(
f"Model name ('{name}') is not a valid python identifier. "
"This is required to correctly package the pipeline so it can be imported as a module.",
"We recommend names that use ASCII A-Z, a-z, _ (underscore), "
"and 0-9 (except as the first character). "
"For specific details see: https://docs.python.org/3/reference/lexical_analysis.html#identifiers",
exits=1,
)
if not _is_permitted_package_name(name):
msg.fail(
f"Model name ('{name}') is not a permitted package name. "
"This is required to correctly install the pipeline after packaging.",
pmbaumgartner marked this conversation as resolved.
Show resolved Hide resolved
"We recommend names that use ASCII A-Z, a-z, _ (underscore), "
"and 0-9, and start with a character. "
"For specific details see: https://www.python.org/dev/peps/pep-0426/#name",
exits=1,
)
meta["name"] = name
if version is not None:
meta["version"] = version
Expand Down Expand Up @@ -162,7 +181,7 @@ def package(
imports="\n".join(f"from . import {m}" for m in imports)
)
create_file(package_path / "__init__.py", init_py)
msg.good(f"Successfully created package '{model_name_v}'", main_path)
msg.good(f"Successfully created package directory '{model_name_v}'", main_path)
if create_sdist:
with util.working_dir(main_path):
util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
Expand All @@ -173,6 +192,11 @@ def package(
util.run_command([sys.executable, "setup.py", "bdist_wheel"], capture=False)
wheel = main_path / "dist" / f"{model_name_v}{WHEEL_SUFFIX}"
msg.good(f"Successfully created binary wheel", wheel)
if _contains_multiple_underscores(name):
msg.warn(
f"Model name ('{name}') contains multiple sequential underscores. "
"This can cause import issues when loading the model.",
)


def has_wheel() -> bool:
Expand Down Expand Up @@ -422,6 +446,18 @@ def _format_label_scheme(data: Dict[str, Any]) -> str:
return md.text


def _is_permitted_package_name(package_name: str) -> bool:
permitted_match = re.search(
r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", package_name, re.IGNORECASE
pmbaumgartner marked this conversation as resolved.
Show resolved Hide resolved
)
return permitted_match is not None


def _contains_multiple_underscores(package_name: str) -> bool:
permitted_match = re.search(r"_{2,}", package_name, re.IGNORECASE)
return permitted_match is not None
pmbaumgartner marked this conversation as resolved.
Show resolved Hide resolved


TEMPLATE_SETUP = """
#!/usr/bin/env python
import io
Expand Down
22 changes: 21 additions & 1 deletion spacy/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
from spacy.cli.debug_data import _get_labels_from_spancat
from spacy.cli.download import get_compatibility, get_version
from spacy.cli.init_config import RECOMMENDATIONS, init_config, fill_config
from spacy.cli.package import get_third_party_dependencies
from spacy.cli.package import (
get_third_party_dependencies,
_contains_multiple_underscores,
_is_permitted_package_name,
)
from spacy.cli.validate import get_model_pkgs
from spacy.lang.en import English
from spacy.lang.nl import Dutch
Expand Down Expand Up @@ -692,3 +696,19 @@ def test_get_labels_from_model(factory_name, pipe_name):
assert _get_labels_from_spancat(nlp)[pipe.key] == set(labels)
else:
assert _get_labels_from_model(nlp, factory_name) == set(labels)


def test_permitted_package_names():
# https://www.python.org/dev/peps/pep-0426/#name
assert _is_permitted_package_name("Meine_Bäume") == False
assert _is_permitted_package_name("_package") == False
assert _is_permitted_package_name("package_") == False
assert _is_permitted_package_name(".package") == False
assert _is_permitted_package_name("package.") == False
assert _is_permitted_package_name("-package") == False
assert _is_permitted_package_name("package-") == False


def test_multiple_underscores():
assert _contains_multiple_underscores("my__package") == True
assert _contains_multiple_underscores("my_package") == False