From 3c861890d03251276cb3f9b83f15f2bb70ee41ba Mon Sep 17 00:00:00 2001 From: Adrien Barbaresi Date: Mon, 26 Aug 2024 18:19:37 +0200 Subject: [PATCH] setup: use pyproject.toml file and update test workflow (#160) * setup: use pyproject.toml file * update workflow * simplify workflow * review order * remove condition --- .github/workflows/tests.yml | 13 ++--- pyproject.toml | 100 ++++++++++++++++++++++++++++++++++++ setup.py | 100 +----------------------------------- 3 files changed, 106 insertions(+), 107 deletions(-) create mode 100644 pyproject.toml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 26552981..b22eae09 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -68,25 +68,20 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Code format with black + - name: Code format and type checking if: ${{ matrix.python-version == '3.11' }} run: | - python -m pip install --upgrade black + python -m pip install --upgrade .[dev] black --check --diff htmldate + mypy -p htmldate - - name: Install dependencies + - name: Install minimal dependencies run: python -m pip install -e "." - name: Install full dependencies if: ${{ matrix.env.MINIMAL == 'false'}} run: python -m pip install -e ".[all]" - - name: Type checking with mypy - if: ${{ matrix.python-version == '3.11' }} - run: | - python -m pip install --upgrade mypy types-dateparser types-python-dateutil types-lxml types-urllib3 - mypy -p htmldate - - name: Test with pytest run: | python -m pip install --upgrade pytest pytest-cov diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..9a277572 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,100 @@ +# https://pip.pypa.io/en/stable/reference/build-system/pyproject-toml/ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "htmldate" +description = "Fast and robust extraction of original and updated publication dates from URLs and web pages." +readme = "README.md" +license = { text = "Apache 2.0" } +dynamic = ["version"] +requires-python = ">=3.8" +authors = [ + {name = "Adrien Barbaresi", email = "barbaresi@bbaw.de"} +] +keywords=[ + "datetime", + "date-parser", + "entity-extraction", + "html-extraction", + "html-parsing", + "metadata-extraction", + "webarchives", + "web-scraping", +] +classifiers = [ + # http://pypi.python.org/pypi?%3Aaction=list_classifiers + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Internet :: WWW/HTTP", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Text Processing :: Linguistic", + "Topic :: Text Processing :: Markup :: HTML", +] +dependencies = [ + "charset_normalizer >= 3.3.2", + "dateparser >= 1.1.2", # 1.1.3+ slower + # see tests on Github Actions + "lxml == 4.9.2 ; platform_system == 'Darwin' and python_version <= '3.8'", + "lxml >= 5.2.2, < 6 ; platform_system != 'Darwin' or python_version > '3.8'", + "python-dateutil >= 2.8.2", + "urllib3 >= 1.26, < 3", +] + +# https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html +[tool.setuptools] +packages = ["htmldate"] + +# https://packaging.python.org/en/latest/guides/single-sourcing-package-version/ +[tool.setuptools.dynamic] +version = {attr = "htmldate.__version__"} + +[project.scripts] +htmldate = "htmldate.cli:main" + +[project.urls] +"Homepage" = "https://htmldate.readthedocs.io" +"Source" = "https://github.com/adbar/htmldate" +"Blog" = "https://adrien.barbaresi.eu/blog/" # /tag/htmldate.html +"Tracker" = "https://github.com/adbar/htmldate/issues" + +# Development extras +[project.optional-dependencies] +dev = [ + "black", + "mypy", + "pytest", + "pytest-cov", + "types-dateparser", + "types-python-dateutil", + "types-lxml", + "types-urllib3", +] +speed = [ + "backports-datetime-fromisoformat; python_version < '3.11'", + "faust-cchardet >= 2.1.19", + "urllib3[brotli]", +] +all = [ + "htmldate[dev]", + "htmldate[speed]", +] + +# [tool.pytest.ini_options] +# testpaths = "tests/*test*.py" diff --git a/setup.py b/setup.py index 7597cdd9..8f91d615 100644 --- a/setup.py +++ b/setup.py @@ -3,40 +3,11 @@ http://github.com/adbar/htmldate """ -import re import sys -from pathlib import Path from setuptools import setup -# some problems with installation solved this way -extras = { - "speed": [ - "backports-datetime-fromisoformat; python_version < '3.11'", - "faust-cchardet >= 2.1.19", - "urllib3[brotli]", - ], -} -extras["all"] = extras["speed"] - - -def get_long_description(): - "Return the README" - with open("README.md", "r", encoding="utf-8") as filehandle: - long_description = filehandle.read() - # long_description += "\n\n" - # with open("CHANGELOG.md", encoding="utf8") as f: - # long_description += f.read() - return long_description - - -def get_version(package): - "Return package version as listed in `__version__` in `init.py`" - initfile = Path(package, "__init__.py").read_text() - return re.search("__version__ = ['\"]([^'\"]+)['\"]", initfile)[1] - - # add argument to compile with mypyc if len(sys.argv) > 1 and sys.argv[1] == "--use-mypyc": sys.argv.pop(1) @@ -48,6 +19,7 @@ def get_version(package): "htmldate/__init__.py", "htmldate/core.py", "htmldate/extractors.py", + "htmldate/meta.py", "htmldate/settings.py", "htmldate/utils.py", "htmldate/validators.py", @@ -60,74 +32,6 @@ def get_version(package): setup( - name="htmldate", - version=get_version("htmldate"), - description="Fast and robust extraction of original and updated publication dates from URLs and web pages.", - long_description=get_long_description(), - long_description_content_type="text/markdown", - classifiers=[ - # As from http://pypi.python.org/pypi?%3Aaction=list_classifiers - "Development Status :: 5 - Production/Stable", - # 'Development Status :: 6 - Mature', - "Environment :: Console", - "Intended Audience :: Developers", - "Intended Audience :: Education", - "Intended Audience :: Information Technology", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Operating System :: MacOS :: MacOS X", - "Operating System :: Microsoft :: Windows", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: Internet :: WWW/HTTP", - "Topic :: Scientific/Engineering :: Information Analysis", - "Topic :: Text Processing :: Linguistic", - "Topic :: Text Processing :: Markup :: HTML", - ], - keywords=[ - "datetime", - "date-parser", - "entity-extraction", - "html-extraction", - "html-parsing", - "metadata-extraction", - "webarchives", - "web-scraping", - ], - url="https://htmldate.readthedocs.io", - project_urls={ - "Source": "https://github.com/adbar/htmldate", - "Tracker": "https://github.com/adbar/htmldate/issues", - "Blog": "https://adrien.barbaresi.eu/blog/tag/htmldate.html", - }, - author="Adrien Barbaresi", - author_email="barbaresi@bbaw.de", - license="Apache-2.0", - packages=["htmldate"], - include_package_data=True, - python_requires=">=3.8", - install_requires=[ - "charset_normalizer >= 3.3.2", - "dateparser >= 1.1.2", # 1.1.3+ slower - # see tests on Github Actions - "lxml == 4.9.2 ; platform_system == 'Darwin' and python_version <= '3.8'", - "lxml >= 5.2.2, < 6 ; platform_system != 'Darwin' or python_version > '3.8'", - "python-dateutil >= 2.8.2", - "urllib3 >= 1.26, < 3", - ], - extras_require=extras, - entry_points={ - "console_scripts": ["htmldate=htmldate.cli:main"], - }, - # platforms='any', - tests_require=["pytest"], - zip_safe=False, - # optional use of mypyc + # mypyc or not ext_modules=ext_modules, )