Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a way to get the URL to download a pipeline to the CLI #11175

Merged
merged 23 commits into from
Sep 2, 2022
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ mypy>=0.910,<0.970; platform_machine!='aarch64'
types-dataclasses>=0.1.3; python_version < "3.7"
types-mock>=0.1.1
types-requests
types-setuptools>=57.0.0
black>=22.0,<23.0
32 changes: 26 additions & 6 deletions spacy/cli/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def download_cli(
ctx: typer.Context,
model: str = Arg(..., help="Name of pipeline package to download"),
direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel")
sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
# fmt: on
):
"""
Expand All @@ -35,7 +35,12 @@ def download_cli(
download(model, direct, sdist, *ctx.args)


def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -> None:
def download(
model: str,
direct: bool = False,
sdist: bool = False,
*pip_args,
) -> None:
if (
not (is_package("spacy") or is_package("spacy-nightly"))
and "--no-deps" not in pip_args
Expand All @@ -49,13 +54,10 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
"dependencies, you'll have to install them manually."
)
pip_args = pip_args + ("--no-deps",)
suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
dl_tpl = "{m}-{v}/{m}-{v}{s}#egg={m}=={v}"
if direct:
components = model.split("-")
model_name = "".join(components[:-1])
version = components[-1]
download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)
else:
model_name = model
if model in OLD_MODEL_SHORTCUTS:
Expand All @@ -66,13 +68,26 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
model_name = OLD_MODEL_SHORTCUTS[model]
compatibility = get_compatibility()
version = get_version(model_name, compatibility)
download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)

filename = get_model_filename(model_name, version, sdist)

download_model(filename, pip_args)
msg.good(
"Download and installation successful",
f"You can now load the package via spacy.load('{model_name}')",
)


def get_model_filename(model_name: str, version: str, sdist: bool = False) -> str:
dl_tpl = "{m}-{v}/{m}-{v}{s}"
egg_tpl = "#egg={m}=={v}"
suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
filename = dl_tpl.format(m=model_name, v=version, s=suffix)
if sdist:
filename += egg_tpl.format(m=model_name, v=version)
return filename


def get_compatibility() -> dict:
version = get_minor_version(about.__version__)
r = requests.get(about.__compatibility__)
Expand Down Expand Up @@ -101,6 +116,11 @@ def get_version(model: str, comp: dict) -> str:
return comp[model][0]


def get_latest_version(model: str) -> str:
comp = get_compatibility()
return get_version(model, comp)


def download_model(
filename: str, user_pip_args: Optional[Sequence[str]] = None
) -> None:
Expand Down
58 changes: 56 additions & 2 deletions spacy/cli/info.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from typing import Optional, Dict, Any, Union, List
import platform
import pkg_resources
import json
from pathlib import Path
from wasabi import Printer, MarkdownRenderer
import srsly

from ._util import app, Arg, Opt, string_to_list
from .download import get_model_filename, get_latest_version
from .. import util
from .. import about

Expand All @@ -16,17 +19,27 @@ def info_cli(
markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"),
# fmt: on
):
"""
Print info about spaCy installation. If a pipeline is specified as an argument,
print its meta information. Flag --markdown prints details in Markdown for easy
copy-pasting to GitHub issues.

Flag --url prints only the download URL of the most recent compatible
version of the pipeline.

DOCS: https://spacy.io/api/cli#info
"""
exclude = string_to_list(exclude)
info(model, markdown=markdown, silent=silent, exclude=exclude)
info(
model,
markdown=markdown,
silent=silent,
exclude=exclude,
url=url,
)


def info(
Expand All @@ -35,11 +48,20 @@ def info(
markdown: bool = False,
silent: bool = True,
exclude: Optional[List[str]] = None,
url: bool = False,
) -> Union[str, dict]:
msg = Printer(no_print=silent, pretty=not silent)
if not exclude:
exclude = []
if model:
if url:
if model is not None:
title = f"Download info for pipeline '{model}'"
data = info_model_url(model)
print(data["download_url"])
svlandeg marked this conversation as resolved.
Show resolved Hide resolved
return data
else:
msg.fail("--url option requires a pipeline name", exits=1)
elif model:
title = f"Info about pipeline '{model}'"
data = info_model(model, silent=silent)
else:
Expand Down Expand Up @@ -99,11 +121,43 @@ def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]:
meta["source"] = str(model_path.resolve())
else:
meta["source"] = str(model_path)
download_url = info_installed_model_url(model)
if download_url:
meta["download_url"] = download_url
return {
k: v for k, v in meta.items() if k not in ("accuracy", "performance", "speed")
}


def info_installed_model_url(model: str) -> Optional[str]:
"""Given a pipeline name, get the download URL if available, otherwise
return None.

This is only available for pipelines installed as modules that have
dist-info available.
"""
try:
dist = pkg_resources.get_distribution(model)
data = json.loads(dist.get_metadata("direct_url.json"))
return data["url"]
except pkg_resources.DistributionNotFound:
# no such package
return None
except Exception:
# something else, like no file or invalid JSON
return None

def info_model_url(model: str) -> Dict[str, Any]:
"""Return the download URL for the latest version of a pipeline."""
version = get_latest_version(model)

filename = get_model_filename(model, version)
download_url = about.__download_url__ + "/" + filename
release_tpl = "https://github.com/explosion/spacy-models/releases/tag/{m}-{v}"
release_url = release_tpl.format(m=model, v=version)
return {"download_url": download_url, "release_url": release_url}


def get_markdown(
data: Dict[str, Any],
title: Optional[str] = None,
Expand Down
1 change: 1 addition & 0 deletions spacy/tests/package/test_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def test_build_dependencies():
"types-dataclasses",
"types-mock",
"types-requests",
"types-setuptools",
]
# ignore language-specific packages that shouldn't be installed by all
libs_ignore_setup = [
Expand Down
17 changes: 9 additions & 8 deletions website/docs/api/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,15 @@ $ python -m spacy info [--markdown] [--silent] [--exclude]
$ python -m spacy info [model] [--markdown] [--silent] [--exclude]
```

| Name | Description |
| ------------------------------------------------ | --------------------------------------------------------------------------------------------- |
| `model` | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~ |
| `--markdown`, `-md` | Print information as Markdown. ~~bool (flag)~~ |
| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~ |
| `--exclude`, `-e` | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ |
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
| **PRINTS** | Information about your spaCy installation. |
| Name | Description |
| ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------- |
| `model` | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~ |
| `--markdown`, `-md` | Print information as Markdown. ~~bool (flag)~~ |
| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~ |
| `--exclude`, `-e` | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ |
| `--url`, `-u` <Tag variant="new">3.5.0</Tag> | Print the URL to download the most recent compatible version of the pipeline. Requires a pipeline name. ~~bool (flag)~~ |
polm marked this conversation as resolved.
Show resolved Hide resolved
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
| **PRINTS** | Information about your spaCy installation. |

## validate {#validate new="2" tag="command"}

Expand Down
25 changes: 20 additions & 5 deletions website/docs/usage/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -365,15 +365,30 @@ pipeline package can be found.
To download a trained pipeline directly using
[pip](https://pypi.python.org/pypi/pip), point `pip install` to the URL or local
path of the wheel file or archive. Installing the wheel is usually more
efficient. To find the direct link to a package, head over to the
[releases](https://github.com/explosion/spacy-models/releases), right click on
the archive link and copy it to your clipboard.
efficient.

> #### How to Find Download URLs {#download-urls}
polm marked this conversation as resolved.
Show resolved Hide resolved
>
> Pretrained pipeline distributions are hosted on
> [Github Releases](https://github.com/explosion/spacy-models/releases), and you
> can find download links there, as well as on the model page. You can also get download URLs directly
polm marked this conversation as resolved.
Show resolved Hide resolved
> from the command line by using `spacy info` with the `--url` flag, which may be useful for automation.
>
> ```bash
> spacy info en_core_web_sm --url
> ```
>
> This command will print the download URL for the latest version of a pipeline
polm marked this conversation as resolved.
Show resolved Hide resolved
> compatible with the version of spaCy you're using. Note that in order to look up the compatability information an Internet connection is required.
polm marked this conversation as resolved.
Show resolved Hide resolved

```bash
# With external URL
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0-py3-none-any.whl
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz

# Using spacy info to get the external URL
$ pip install $(spacy info en_core_web_sm --url)

# With local file
$ pip install /Users/you/en_core_web_sm-3.0.0-py3-none-any.whl
$ pip install /Users/you/en_core_web_sm-3.0.0.tar.gz
Expand Down Expand Up @@ -515,8 +530,8 @@ Because pipeline packages are valid Python packages, you can add them to your
application's `requirements.txt`. If you're running your own internal PyPi
installation, you can upload the pipeline packages there. pip's
[requirements file format](https://pip.pypa.io/en/latest/reference/pip_install/#requirements-file-format)
polm marked this conversation as resolved.
Show resolved Hide resolved
supports both package names to download via a PyPi server, as well as direct
URLs.
supports both package names to download via a PyPi server, as well as [direct
URLs](#download-urls).
polm marked this conversation as resolved.
Show resolved Hide resolved

```text
### requirements.txt
Expand Down
10 changes: 10 additions & 0 deletions website/src/templates/models.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ const MODEL_META = {
benchmark_ner: 'NER accuracy',
benchmark_speed: 'Speed',
compat: 'Latest compatible package version for your spaCy installation',
download_url: 'Download URL for the pipeline',
}

const LABEL_SCHEME_META = {
Expand Down Expand Up @@ -134,6 +135,13 @@ function formatAccuracy(data, lang) {
.filter(item => item)
}

function formatDownloadUrl(lang, name, version) {
const fullName = `${lang}_${name}-${version}`
const filename = `${fullName}-py3-none-any.whl`
const url = `https://github.com/explosion/spacy-models/releases/download/${fullName}/${filename}`
return <Link to={url} hideIcon>{filename}</Link>
polm marked this conversation as resolved.
Show resolved Hide resolved
}

function formatModelMeta(data) {
return {
fullName: `${data.lang}_${data.name}-${data.version}`,
Expand All @@ -150,6 +158,7 @@ function formatModelMeta(data) {
labels: isEmptyObj(data.labels) ? null : data.labels,
vectors: formatVectors(data.vectors),
accuracy: formatAccuracy(data.performance, data.lang),
download_url: formatDownloadUrl(data.lang, data.name, data.version),
}
}

Expand Down Expand Up @@ -240,6 +249,7 @@ const Model = ({
{ label: 'Components', content: components, help: MODEL_META.components },
{ label: 'Pipeline', content: pipeline, help: MODEL_META.pipeline },
{ label: 'Vectors', content: meta.vectors, help: MODEL_META.vecs },
{ label: 'Download URL', content: meta.download_url, help: MODEL_META.download_url },
polm marked this conversation as resolved.
Show resolved Hide resolved
{ label: 'Sources', content: sources, help: MODEL_META.sources },
{ label: 'Author', content: author },
{ label: 'License', content: license },
Expand Down