diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 1d5aceefb..ef1d2c98f 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -8,8 +8,9 @@ on: pull_request: workflow_dispatch: env: - PIP_UPGRADE: "true" # always upgrade to latest version - PIP_UPGRADE_STRATEGY: "eager" # upgrade all dependencies + PIPX_HOME: "/home/runner/.cache/pipx" + PIPX_BIN_DIR: "/home/runner/.local/bin" + POETRY_VERSION: "1.2.0" jobs: test: runs-on: ubuntu-latest @@ -26,47 +27,51 @@ jobs: sudo apt-get install \ libvoikko1 \ voikko-fi + - name: Load cached Poetry installation and repositories info + uses: actions/cache@v3 + with: + path: | + ~/.cache/pipx/venvs + ~/.local/bin + ~/.cache/pypoetry/cache/repositories + key: poetry-installation-and-repos-${{ matrix.python-version }}-${{ env.POETRY_VERSION }} + - name: Install Poetry + run: | + pipx install poetry==$POETRY_VERSION - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@b55428b1882923874294fa556849718a1d7f2ca5 # v4.2.0 with: python-version: ${{ matrix.python-version }} - cache: pip - cache-dependency-path: setup.py + cache: 'poetry' + cache-dependency-path: 'pyproject.toml' - name: Install Python dependencies run: | - python -m pip install pip setuptools wheel - python -m pip install .[dev] - python -m nltk.downloader punkt # Selectively install the optional dependencies for some Python versions - # Install the optional neural network dependencies (TensorFlow and LMDB) - # - except for one Python version (3.9) so that we can test also without them - if [[ ${{ matrix.python-version }} != '3.9' ]]; then pip install .[nn]; fi - # Install the optional Omikuji and YAKE dependencies - # - except for one Python version (3.9) so that we can test also without them - if [[ ${{ matrix.python-version }} != '3.9' ]]; then pip install .[omikuji,yake]; fi - # Install the optional fastText dependencies for Python 3.9 only - if [[ ${{ matrix.python-version }} == '3.9' ]]; then pip install .[fasttext]; fi - # Install the optional spaCy dependencies for Python 3.9 only + # For Python 3.8: + if [[ ${{ matrix.python-version }} == '3.8' ]]; then + poetry install -E "nn omikuji yake voikko pycld3"; + fi + # For Python 3.9: if [[ ${{ matrix.python-version }} == '3.9' ]]; then - pip install .[spacy] + poetry install -E "fasttext spacy"; # download the small English pretrained spaCy model needed by spacy analyzer - python -m spacy download en_core_web_sm --upgrade-strategy only-if-needed + poetry run python -m spacy download en_core_web_sm --upgrade-strategy only-if-needed + fi + # For Python 3.10: + if [[ ${{ matrix.python-version }} == '3.10' ]]; then + poetry install -E "nn omikuji yake"; fi - # For Python 3.8 - # - voikko and pycld3 dependencies - if [[ ${{ matrix.python-version }} == '3.8' ]]; then python -m pip install .[voikko,pycld3]; fi - # Verify installed packages have compatible dependencies: - python -m pip check + poetry run python -m nltk.downloader punkt - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + poetry run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | - pytest --cov=./ --cov-report xml + poetry run pytest --cov=./ --cov-report xml - name: Upload coverage to Codecov uses: codecov/codecov-action@81cd2dc8148241f03f5839d295e000b8f761e378 # v3.1.0 @@ -103,21 +108,30 @@ jobs: if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') steps: - uses: actions/checkout@v3 + - name: Load cached Poetry installation and repositories info + uses: actions/cache@v3 + with: + path: | + ~/.cache/pipx/venvs + ~/.local/bin + ~/.cache/pypoetry/cache/repositories + key: poetry-installation-and-repos-${{ matrix.python-version }}-${{ env.POETRY_VERSION }} + - name: Install Poetry + run: | + pipx install poetry==$POETRY_VERSION - name: Set up Python 3.9 - uses: actions/setup-python@v3 + uses: actions/setup-python@b55428b1882923874294fa556849718a1d7f2ca5 # v4.2.0 with: python-version: '3.9' - cache: pip - cache-dependency-path: setup.py - - name: Build distribution + cache: 'poetry' + cache-dependency-path: 'pyproject.toml' + - name: Build and publish distribution to PyPI + env: + POETRY_HTTP_BASIC_PYPI_USERNAME: __token__ + POETRY_HTTP_BASIC_PYPI_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} run: | - python -m pip install wheel - python setup.py sdist bdist_wheel - - name: Publish distribution to PyPI - uses: pypa/gh-action-pypi-publish@717ba43cfbb0387f6ce311b169a825772f54d295 # v1.5.0 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} + poetry publish --build + - name: Login to Quay.io uses: docker/login-action@49ed152c8eca782a232dede0303416e8f356c37b # v2.0.0 with: diff --git a/.gitignore b/.gitignore index c65a12729..610dbc178 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,6 @@ __pycache__ *.pyc data docs/_build/ -Pipfile.lock +poetry.lock projects.cfg venv/ diff --git a/Dockerfile b/Dockerfile index 1f6849adc..208471603 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,26 +1,10 @@ -FROM python:3.8-slim-bullseye AS builder - +FROM python:3.8-slim-bullseye LABEL maintainer="Juho Inkinen " - SHELL ["/bin/bash", "-c"] -ARG optional_dependencies=dev,voikko,pycld3,fasttext,nn,omikuji,yake,spacy -# Bulding fastText needs some system packages -RUN if [[ $optional_dependencies =~ "fasttext" ]]; then \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential && \ - pip install --upgrade pip setuptools wheel --no-cache-dir && \ - pip install --no-cache-dir \ - fasttext==0.9.2; \ - fi +ARG optional_dependencies="fasttext voikko pycld3 fasttext nn omikuji yake spacy" +ARG POETRY_VIRTUALENVS_CREATE=false -FROM python:3.8-slim-bullseye - -SHELL ["/bin/bash", "-c"] -COPY --from=builder /usr/local/lib/python3.8 /usr/local/lib/python3.8 - -ARG optional_dependencies=dev,voikko,pycld3,fasttext,nn,omikuji,yake,spacy # Install system dependencies needed at runtime: RUN apt-get update && \ if [[ $optional_dependencies =~ "voikko" ]]; then \ @@ -33,11 +17,14 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* /usr/include/* WORKDIR /Annif -RUN pip install --upgrade pip wheel --no-cache-dir +RUN pip install --upgrade pip poetry --no-cache-dir && \ + pip install poetry + +COPY pyproject.toml setup.cfg README.md LICENSE.txt CITATION.cff projects.cfg.dist /Annif/ -COPY setup.py README.md LICENSE.txt projects.cfg.dist /Annif/ +# First round of installation for Docker layer caching: RUN echo "Installing dependencies for optional features: $optional_dependencies" \ - && pip install .[$optional_dependencies] --no-cache-dir + && poetry install -E "$optional_dependencies" # Download nltk data RUN python -m nltk.downloader punkt -d /usr/share/nltk_data @@ -50,17 +37,19 @@ RUN if [[ $optional_dependencies =~ "spacy" ]]; then \ done; \ fi -# Install Annif by copying source and make the installation editable: +# Second round of installation with the actual code: COPY annif /Annif/annif COPY tests /Annif/tests -RUN pip install -e . +RUN poetry install -E "$optional_dependencies" WORKDIR /annif-projects # Switch user to non-root: RUN groupadd -g 998 annif_user && \ useradd -r -u 998 -g annif_user annif_user && \ - chown -R annif_user:annif_user /annif-projects + chmod -R a+rX /Annif && \ + mkdir -p /Annif/tests/data && \ + chown -R annif_user:annif_user /annif-projects /Annif/tests/data USER annif_user CMD annif diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index f9022a3a2..000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include annif/swagger/annif.yaml -recursive-include annif/templates * -recursive-include annif/static * diff --git a/README.md b/README.md index 0f72f34bd..db4861fde 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,7 @@ for details. A development version of Annif can be installed by cloning the [GitHub repository](https://github.com/NatLibFi/Annif). +[Poetry](https://python-poetry.org/) is used for managing dependencies and virtual environment for the development version. ## Installation and setup @@ -61,15 +62,26 @@ Clone the repository. Switch into the repository directory. -Create and activate a virtual environment (optional, but highly recommended): +Install [pipx](https://pypa.github.io/pipx/) and Poetry if you don't have them. First pipx: - python3 -m venv venv - . venv/bin/activate + python3 -m pip install --user pipx + python3 -m pipx ensurepath -Install dependencies (including development) and make the installation editable: +Open a new shell, and then install Poetry: - pip install .[dev] - pip install -e . + pipx install poetry + +Poetry can be installed also without pipx: check the [Poetry documentation](https://python-poetry.org/docs/master/#installation). + +Create a virtual environment and install dependencies: + + poetry install + +By default development dependencies are included. Use option `-E` to install dependencies for selected optional features (`-E "extra1 extra2"` for multiple extras), or install all of them with `--all-extras`. By default the virtual environment directory is not under the project directory, but there is a [setting for selecting this](https://python-poetry.org/docs/configuration/#virtualenvsin-project). + +Enter the virtual environment: + + poetry shell You will also need NLTK data files: @@ -81,7 +93,7 @@ Start up the application: ## Unit tests -Run `. venv/bin/activate` to enter the virtual environment and then run `pytest`. +Run `poetry shell` to enter the virtual environment and then run `pytest`. To have the test suite watch for changes in code and run automatically, use pytest-watch by running `ptw`. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..0f833f4cf --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,93 @@ +[tool.poetry] +name = "annif" +version = "0.59.0-dev" +description = "Automated subject indexing and classification tool" +authors = ["National Library of Finland "] +maintainers = [ + "Osma Suominen ", + "Juho Inkinen ", + "Mona Lehtinen ", +] +license = "Apache-2.0" +readme = "README.md" +homepage = "https://annif.org" +repository = "https://github.com/NatLibFi/Annif" +documentation = "https://github.com/NatLibFi/Annif/wiki" +keywords = [ + "machine-learning", + "text-classification", + "rest-api", + "code4lib", + "subject-indexing" +] + +classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent" +] + +[tool.poetry.dependencies] +python = ">=3.8,<3.11" + +connexion = {version = "2.14.*", extras = ["swagger-ui"]} +swagger_ui_bundle = "*" +flask = ">=1.0.4,<3" +flask-cors = "*" +click = "8.1.*" +click-log = "*" +joblib = "1.1.0" +nltk = "*" +gensim = "4.2.*" +scikit-learn = "1.1.1" +scipy = "1.8.*" +rdflib = ">=4.2,<7.0" +gunicorn = "*" +numpy = "1.23.*" +optuna = "2.10.*" +stwfsapy = "0.3.*" +python-dateutil = "*" +tomli = "2.0.*" +simplemma = "0.8.*" + +fasttext-wheel = {version = "0.9.2", optional = true} +voikko = {version = "*", optional = true} +tensorflow-cpu = {version = "2.9.1", optional = true} +lmdb = {version = "1.3.0", optional = true} +omikuji = {version = "0.5.*", optional = true} +yake = {version = "0.4.5", optional = true} +pycld3 = {version = "*", optional = true} +spacy = {version = "3.3.*", optional = true} + +[tool.poetry.dev-dependencies] +py = "*" +pytest = "*" +requests = "*" +codecov = "*" +coverage = "<=6.2" +pytest-cov = "*" +pytest-watch = "*" +pytest-flask = "*" +pytest-flake8 = "*" +flake8 = "4.*" +bumpversion = "*" +autopep8 = "*" + +[tool.poetry.extras] +fasttext = ["fasttext-wheel"] +voikko = ["voikko"] +nn = ["tensorflow-cpu", "lmdb"] +omikuji = ["omikuji"] +yake = ["yake"] +pycld3 = ["pycld3"] +spacy = ["spacy"] + +[tool.poetry.scripts] +annif = "annif.cli:cli" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +addopts = "--flake8" diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index f30d24ac0..000000000 --- a/pytest.ini +++ /dev/null @@ -1,2 +0,0 @@ -[pytest] -addopts = --flake8 diff --git a/setup.cfg b/setup.cfg index 7b38ec012..d662dbc5e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,7 +7,7 @@ serialize = {major}.{minor}.{patch}-{release} {major}.{minor}.{patch} -[bumpversion:file:setup.py] +[bumpversion:file:pyproject.toml] [bumpversion:file:CITATION.cff] diff --git a/setup.py b/setup.py deleted file mode 100644 index 9aee9e564..000000000 --- a/setup.py +++ /dev/null @@ -1,75 +0,0 @@ -import os -from setuptools import setup, find_packages - - -def read(fname): - return open(os.path.join(os.path.dirname(__file__), fname)).read() - - -setup( - name='annif', - version='0.59.0-dev', - url='https://annif.org', - project_urls={ - 'Source': 'https://github.com/NatLibFi/Annif', - 'Documentation': 'https://github.com/NatLibFi/Annif/wiki', - }, - author='Osma Suominen', - author_email='osma.suominen@helsinki.fi', - description='Automated subject indexing and classification tool', - long_description=read('README.md'), - long_description_content_type='text/markdown', - packages=find_packages(), - include_package_data=True, - zip_safe=False, - python_requires='>=3.8', - install_requires=[ - 'connexion[swagger-ui]==2.14.*', - 'swagger_ui_bundle', - 'flask>=1.0.4,<3', - 'flask-cors', - 'click==8.1.*', - 'click-log', - 'joblib==1.1.0', - 'nltk', - 'gensim==4.2.*', - 'scikit-learn==1.1.1', - 'scipy==1.8.*', - 'rdflib>=4.2,<7.0', - 'gunicorn', - 'numpy==1.23.*', - 'optuna==2.10.*', - 'stwfsapy==0.3.*', - 'python-dateutil', - 'tomli==2.0.*', - 'simplemma==0.8.*' - ], - tests_require=['py', 'pytest', 'requests'], - extras_require={ - 'fasttext': ['fasttext==0.9.2'], - 'voikko': ['voikko'], - 'nn': ['tensorflow-cpu==2.9.1', 'lmdb==1.3.0'], - 'omikuji': ['omikuji==0.5.*'], - 'yake': ['yake==0.4.5'], - 'pycld3': ['pycld3'], - 'spacy': ['spacy==3.3.*'], - 'dev': [ - 'codecov', - 'coverage<=6.2', - 'pytest-cov', - 'pytest-watch', - 'pytest-flask', - 'pytest-flake8', - 'flake8<5', - 'bumpversion', - 'autopep8' - ] - }, - entry_points={ - 'console_scripts': ['annif=annif.cli:cli']}, - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - ] -)