Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove all datasets related extras_require and move requirements to pyproject.toml #3078

Merged
merged 24 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion features/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,6 @@ def _install_project_requirements(context):
.splitlines()
)
install_reqs = [req for req in install_reqs if "{" not in req and "#" not in req]
install_reqs.append(".[pandas.CSVDataSet]")
install_reqs.append("kedro-datasets[pandas.CSVDataSet]")
call([context.pip, "install", *install_reqs], env=context.env)
return context
2 changes: 1 addition & 1 deletion features/load_context.feature
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Feature: Custom Kedro project
Background:
Given I have prepared a config file
And I have run a non-interactive kedro new with starter "default"
And I have run a non-interactive kedro new with starter "default"

Scenario: Update the source directory to be nested
When I move the package to "src/nested"
Expand Down
52 changes: 51 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,57 @@ classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
]
dynamic = ["readme", "version", "optional-dependencies"]
dynamic = ["readme", "version"]

[project.optional-dependencies]
test = [
"bandit>=1.6.2, <2.0",
"behave==1.2.6",
"blacken-docs==1.9.2",
"black~=22.0",
"coverage[toml]",
"fsspec<2023.9", # Temporary, newer version causing "test_no_versions_with_cloud_protocol" to fail
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to have a follow up ticket to unpin this again.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I'll create one.

"import-linter[toml]==1.8.0",
"ipython>=7.31.1, <8.0; python_version < '3.8'",
"ipython~=8.10; python_version >= '3.8'",
"Jinja2<3.1.0",
"jupyterlab_server>=2.11.1",
"jupyterlab~=3.0",
"jupyter~=1.0",
"memory_profiler>=0.50.0, <1.0",
"moto==1.3.7; python_version < '3.10'",
"moto==4.1.12; python_version >= '3.10'",
"pandas~=2.0",
"pre-commit>=2.9.2, <3.0", # The hook `mypy` requires pre-commit version 2.9.2.
"pyarrow>=1.0; python_version < '3.11'",
"pyarrow>=7.0; python_version >= '3.11'", # Adding to avoid numpy build errors
"pyproj~=3.0",
"pytest-cov~=3.0",
"pytest-mock>=1.7.1, <2.0",
"pytest-xdist[psutil]~=2.2.1",
"pytest~=7.2",
"s3fs>=0.3.0, <0.5", # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem.
"semver",
"trufflehog~=2.1",
]
docs = [
# docutils>=0.17 changed the HTML
# see https://github.com/readthedocs/sphinx_rtd_theme/issues/1115
"docutils==0.16",
"sphinx~=5.3.0",
"sphinx_rtd_theme==1.2.0",
# Regression on sphinx-autodoc-typehints 1.21
# that creates some problematic docstrings
"sphinx-autodoc-typehints==1.20.2",
"sphinx_copybutton==0.3.1",
"sphinx-notfound-page",
"ipykernel>=5.3, <7.0",
"sphinxcontrib-mermaid~=0.7.1",
"myst-parser~=1.0.0",
"Jinja2<3.1.0",
"kedro-datasets[all]~=1.7.0",
]
all = [ "kedro[test,docs]" ]

[project.urls]
Homepage = "https://kedro.org"
Expand Down
191 changes: 0 additions & 191 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,7 @@
from glob import glob
from itertools import chain

from setuptools import setup

# at least 1.3 to be able to use XMLDataSet and pandas integration with fsspec
PANDAS = "pandas~=1.3"
SPARK = "pyspark>=2.2, <3.4"
HDFS = "hdfs>=2.5.8, <3.0"
S3FS = "s3fs>=0.3.0, <0.5"

template_files = []
for pattern in ["**/*", "**/.*", "**/.*/**", "**/.*/.**"]:
template_files.extend(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm so looking forward to deleting this too 😄 (in a future PR)

Expand All @@ -18,192 +11,8 @@
]
)


def _collect_requirements(requires):
return sorted(set(chain.from_iterable(requires.values())))


api_require = {"api.APIDataSet": ["requests~=2.20"]}
biosequence_require = {"biosequence.BioSequenceDataSet": ["biopython~=1.73"]}
dask_require = {"dask.ParquetDataSet": ["dask[complete]~=2021.10", "triad>=0.6.7, <1.0"]}
geopandas_require = {
"geopandas.GeoJSONDataSet": ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"]
}
matplotlib_require = {"matplotlib.MatplotlibWriter": ["matplotlib>=3.0.3, <4.0"]}
holoviews_require = {"holoviews.HoloviewsWriter": ["holoviews>=1.13.0"]}
networkx_require = {"networkx.NetworkXDataSet": ["networkx~=2.4"]}
pandas_require = {
"pandas.CSVDataSet": [PANDAS],
"pandas.ExcelDataSet": [PANDAS, "openpyxl>=3.0.6, <4.0"],
"pandas.FeatherDataSet": [PANDAS],
"pandas.GBQTableDataSet": [PANDAS, "pandas-gbq>=0.12.0, <0.18.0"],
"pandas.GBQQueryDataSet": [PANDAS, "pandas-gbq>=0.12.0, <0.18.0"],
"pandas.HDFDataSet": [
PANDAS,
"tables~=3.6.0; platform_system == 'Windows'",
"tables~=3.6; platform_system != 'Windows'",
],
"pandas.JSONDataSet": [PANDAS],
"pandas.ParquetDataSet": [PANDAS, "pyarrow>=1.0, <7.0"],
"pandas.SQLTableDataSet": [PANDAS, "SQLAlchemy~=1.2"],
"pandas.SQLQueryDataSet": [PANDAS, "SQLAlchemy~=1.2"],
"pandas.XMLDataSet": [PANDAS, "lxml~=4.6"],
"pandas.GenericDataSet": [PANDAS],
}
pickle_require = {"pickle.PickleDataSet": ["compress-pickle[lz4]~=2.1.0"]}
pillow_require = {"pillow.ImageDataSet": ["Pillow~=9.0"]}
video_require = {
"video.VideoDataSet": ["opencv-python~=4.5.5.64"]
}
plotly_require = {
"plotly.PlotlyDataSet": [PANDAS, "plotly>=4.8.0, <6.0"],
"plotly.JSONDataSet": ["plotly>=4.8.0, <6.0"],
}
redis_require = {"redis.PickleDataSet": ["redis~=4.1"]}
spark_require = {
"spark.SparkDataSet": [SPARK, HDFS, S3FS],
"spark.SparkHiveDataSet": [SPARK, HDFS, S3FS],
"spark.SparkJDBCDataSet": [SPARK, HDFS, S3FS],
"spark.DeltaTableDataSet": [SPARK, HDFS, S3FS, "delta-spark>=1.0, <3.0"],
}
svmlight_require = {"svmlight.SVMLightDataSet": ["scikit-learn~=1.0.2", "scipy~=1.7.3"]}
tensorflow_required = {
"tensorflow.TensorflowModelDataset": [
# currently only TensorFlow V2 supported for saving and loading.
# V1 requires HDF5 and serialises differently
"tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
# https://developer.apple.com/metal/tensorflow-plugin/
"tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'",
]
}
yaml_require = {"yaml.YAMLDataSet": [PANDAS, "PyYAML>=4.2, <7.0"]}

extras_require = {
"api": _collect_requirements(api_require),
"biosequence": _collect_requirements(biosequence_require),
"dask": _collect_requirements(dask_require),
"docs": [
# docutils>=0.17 changed the HTML
# see https://github.com/readthedocs/sphinx_rtd_theme/issues/1115
"docutils==0.16",
"sphinx~=5.3.0",
"sphinx_rtd_theme==1.2.0",
# Regression on sphinx-autodoc-typehints 1.21
# that creates some problematic docstrings
"sphinx-autodoc-typehints==1.20.2",
"sphinx_copybutton==0.3.1",
"sphinx-notfound-page",
"ipykernel>=5.3, <7.0",
"sphinxcontrib-mermaid~=0.7.1",
"myst-parser~=1.0.0",
"Jinja2<3.1.0",
"kedro-datasets[all]~=1.7.0",
],
"geopandas": _collect_requirements(geopandas_require),
"matplotlib": _collect_requirements(matplotlib_require),
"holoviews": _collect_requirements(holoviews_require),
"networkx": _collect_requirements(networkx_require),
"pandas": _collect_requirements(pandas_require),
"pickle": _collect_requirements(pickle_require),
"pillow": _collect_requirements(pillow_require),
"video": _collect_requirements(video_require),
"plotly": _collect_requirements(plotly_require),
"redis": _collect_requirements(redis_require),
"spark": _collect_requirements(spark_require),
"svmlight": _collect_requirements(svmlight_require),
"tensorflow": _collect_requirements(tensorflow_required),
"yaml": _collect_requirements(yaml_require),
**api_require,
**biosequence_require,
**dask_require,
**geopandas_require,
**matplotlib_require,
**holoviews_require,
**networkx_require,
**pandas_require,
**pickle_require,
**pillow_require,
**video_require,
**plotly_require,
**spark_require,
**svmlight_require,
**tensorflow_required,
**yaml_require,
}

extras_require["all"] = _collect_requirements(extras_require)
extras_require["test"] = [
"adlfs~=2023.1; python_version >= '3.8'",
"bandit>=1.6.2, <2.0",
"behave==1.2.6",
"biopython~=1.73",
"blacken-docs==1.9.2",
"black~=22.0",
"compress-pickle[lz4]~=2.1.0",
"coverage[toml]",
"dask[complete]~=2021.10", # pinned by Snyk to avoid a vulnerability
"delta-spark>=1.2.1; python_version >= '3.11'", # 1.2.0 has a bug that breaks some of our tests: https://github.com/delta-io/delta/issues/1070
"delta-spark~=1.2.1; python_version < '3.11'",
"dill~=0.3.1",
"filelock>=3.4.0, <4.0",
"gcsfs>=2023.1, <2023.3; python_version >= '3.8'",
"geopandas>=0.6.0, <1.0",
"hdfs>=2.5.8, <3.0",
"holoviews>=1.13.0",
"import-linter[toml]==1.8.0",
"ipython>=7.31.1, <8.0; python_version < '3.8'",
"ipython~=8.10; python_version >= '3.8'",
"Jinja2<3.1.0",
"joblib>=0.14",
"jupyterlab_server>=2.11.1",
"jupyterlab~=3.0",
"jupyter~=1.0",
"lxml~=4.6",
"matplotlib>=3.0.3, <3.4; python_version < '3.10'", # 3.4.0 breaks holoviews
"matplotlib>=3.5, <3.6; python_version >= '3.10'",
"memory_profiler>=0.50.0, <1.0",
"moto==1.3.7; python_version < '3.10'",
"moto==4.1.12; python_version >= '3.10'",
"networkx~=2.4",
"opencv-python~=4.5.5.64",
"openpyxl>=3.0.3, <4.0",
"pandas-gbq>=0.12.0, <0.18.0; python_version < '3.11'",
"pandas-gbq>=0.18.0; python_version >= '3.11'",
"pandas~=1.3 # 1.3 for read_xml/to_xml",
"Pillow~=9.0",
"plotly>=4.8.0, <6.0",
"pre-commit>=2.9.2, <3.0", # The hook `mypy` requires pre-commit version 2.9.2.
"pyarrow>=1.0; python_version < '3.11'",
"pyarrow>=7.0; python_version >= '3.11'", # Adding to avoid numpy build errors
"pyproj~=3.0",
"pyspark>=2.2, <3.4; python_version < '3.11'",
"pyspark>=3.4; python_version >= '3.11'",
"pytest-cov~=3.0",
"pytest-mock>=1.7.1, <2.0",
"pytest-xdist[psutil]~=2.2.1",
"pytest~=7.2",
"redis~=4.1",
"requests-mock~=1.6",
"requests~=2.20",
"s3fs>=0.3.0, <0.5", # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem.
"scikit-learn>=1.0.2,<2",
"scipy>=1.7.3",
"semver",
"SQLAlchemy~=1.2",
"tables~=3.6.0; platform_system == 'Windows' and python_version<'3.8'",
"tables~=3.8.0; platform_system == 'Windows' and python_version>='3.8'", # Import issues with python 3.8 with pytables pinning to 3.8.0 fixes this https://github.com/PyTables/PyTables/issues/933#issuecomment-1555917593
"tables~=3.6; platform_system != 'Windows'",
"tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
# https://developer.apple.com/metal/tensorflow-plugin/
"tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'",
"triad>=0.6.7, <1.0",
"trufflehog~=2.1",
"xlsxwriter~=1.0",
]

setup(
package_data={
"kedro": ["py.typed"] + template_files
},
extras_require=extras_require,
)
2 changes: 1 addition & 1 deletion tests/framework/context/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import toml
import yaml
from attrs.exceptions import FrozenInstanceError
from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_frame_equal

from kedro import __version__ as kedro_version
from kedro.config import ConfigLoader, MissingConfigException
Expand Down
2 changes: 1 addition & 1 deletion tests/io/test_data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
import pytest
from kedro_datasets.pandas import CSVDataSet, ParquetDataSet
from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_frame_equal

from kedro.io import (
AbstractDataset,
Expand Down
2 changes: 1 addition & 1 deletion tests/io/test_incremental_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from kedro_datasets.pickle import PickleDataSet
from kedro_datasets.text import TextDataSet
from moto import mock_s3
from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_frame_equal

from kedro.io import AbstractDataset, DatasetError, IncrementalDataset
from kedro.io.data_catalog import CREDENTIALS_KEY
Expand Down
2 changes: 1 addition & 1 deletion tests/io/test_partitioned_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import s3fs
from kedro_datasets.pandas import CSVDataSet, ParquetDataSet
from moto import mock_s3
from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_frame_equal

from kedro.io import DatasetError, PartitionedDataset
from kedro.io.data_catalog import CREDENTIALS_KEY
Expand Down