kedro-org · ankatiyar · Oct 3, 2023 · Sep 25, 2023 · Sep 25, 2023 · Sep 25, 2023
diff --git a/features/ipython.feature b/features/ipython.feature
@@ -3,6 +3,7 @@ Feature: IPython target in new project
   Scenario: Execute ipython target
     Given I have prepared a config file
     And I have run a non-interactive kedro new with starter "default"
+    And I have installed the project dependencies
     When I execute the kedro command "ipython"
     Then I should get a message including "An enhanced Interactive Python"
     And I should get a message including "Kedro project project-dummy"

@@ -1,7 +1,8 @@
 Feature: Custom Kedro project
     Background:
         Given I have prepared a config file
-    And I have run a non-interactive kedro new with starter "default"
+        And I have run a non-interactive kedro new with starter "default"
+        And I have installed the project dependencies
 
     Scenario: Update the source directory to be nested
         When I move the package to "src/nested"

diff --git a/features/run.feature b/features/run.feature
@@ -7,13 +7,15 @@ Feature: Run Project
 
     Given I have prepared a config file
     And I have run a non-interactive kedro new with starter "default"
+    And I have installed the project dependencies
     When I execute the kedro command "run"
     Then I should get a successful exit code
     And the logs should show that 4 nodes were run
 
   Scenario: Run parallel runner with default python entry point with example code
     Given I have prepared a config file
     And I have run a non-interactive kedro new with starter "default"
+    And I have installed the project dependencies
     When I execute the kedro command "run --runner=ParallelRunner"
     Then I should get a successful exit code
     And the logs should show that "split_data" was run
@@ -32,13 +34,15 @@ Feature: Run Project
     Given I have prepared a config file
     And I have run a non-interactive kedro new with starter "default"
     And I have prepared a run_config file with config options
+    And I have installed the project dependencies
     When I execute the kedro command "run --config run_config.yml"
     Then I should get a successful exit code
     And the logs should show that 1 nodes were run
 
   Scenario: Run kedro run with config from archive and OmegaConfigLoader
     Given I have prepared a config file
     And I have run a non-interactive kedro new with starter "default"
+    And I have installed the project dependencies
     And I have set the OmegaConfigLoader in settings
     When I execute the kedro command "package"
     Then I should get a successful exit code
@@ -50,13 +54,15 @@ Feature: Run Project
     Given I have prepared a config file
     And I have run a non-interactive kedro new with starter "default"
     And I have prepared a run_config file with config options
+    And I have installed the project dependencies
     When I execute the kedro command "run --config run_config.yml --pipeline __default__"
     Then I should get a successful exit code
     And the logs should show that 4 nodes were run
 
   Scenario: Run kedro run with extra parameters
     Given I have prepared a config file
     And I have run a non-interactive kedro new with starter "default"
+    And I have installed the project dependencies
     When I execute the kedro command "run --params extra1:1,extra2:value2"
     Then I should get a successful exit code
     And the logs should show that 4 nodes were run
@@ -54,7 +54,56 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
-dynamic = ["readme", "version", "optional-dependencies"]
+dynamic = ["readme", "version"]
+
+[project.optional-dependencies]
+test = [
+    "bandit>=1.6.2, <2.0",
+    "behave==1.2.6",
+    "blacken-docs==1.9.2",
+    "black~=22.0",
+    "coverage[toml]",
+    "import-linter[toml]==1.8.0",
+    "ipython>=7.31.1, <8.0; python_version < '3.8'",
+    "ipython~=8.10; python_version >= '3.8'",
+    "Jinja2<3.1.0",
+    "jupyterlab_server>=2.11.1, <2.16.0",  # 2.16.0 requires importlib_metedata >= 4.8.3 which conflicts with flake8 requirement
+    "jupyterlab~=3.0, <3.6.0",  # 3.6.0 requires jupyterlab_server~=2.19
+    "jupyter~=1.0",
+    "memory_profiler>=0.50.0, <1.0",
+    "moto==1.3.7; python_version < '3.10'",
+    "moto==4.1.12; python_version >= '3.10'",
+    "pandas",
+    "pre-commit>=2.9.2, <3.0",  # The hook `mypy` requires pre-commit version 2.9.2.
+    "pyarrow>=1.0; python_version < '3.11'",
+    "pyarrow>=7.0; python_version >= '3.11'",  # Adding to avoid numpy build errors
+    "pyproj~=3.0",
+    "pytest-cov~=3.0",
+    "pytest-mock>=1.7.1, <2.0",
+    "pytest-xdist[psutil]~=2.2.1",
+    "pytest~=7.2",
+    "s3fs>=0.3.0, <0.5",  # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem.
+    "semver",
+    "trufflehog~=2.1",
+]
+docs = [
+    # docutils>=0.17 changed the HTML
+    # see https://github.com/readthedocs/sphinx_rtd_theme/issues/1115
+    "docutils==0.16",
+    "sphinx~=5.3.0",
+    "sphinx_rtd_theme==1.2.0",
+    # Regression on sphinx-autodoc-typehints 1.21
+    # that creates some problematic docstrings
+    "sphinx-autodoc-typehints==1.20.2",
+    "sphinx_copybutton==0.3.1",
+    "sphinx-notfound-page",
+    "ipykernel>=5.3, <7.0",
+    "sphinxcontrib-mermaid~=0.7.1",
+    "myst-parser~=1.0.0",
+    "Jinja2<3.1.0",
+    "kedro-datasets[all]~=1.7.0",
+]
+all = [ "kedro[test,docs]" ]
 
 [project.urls]
 Homepage = "https://kedro.org"

@@ -1,14 +1,7 @@
 from glob import glob
-from itertools import chain
 
 from setuptools import setup
 
-# at least 1.3 to be able to use XMLDataSet and pandas integration with fsspec
-PANDAS = "pandas~=1.3"
-SPARK = "pyspark>=2.2, <3.4"
-HDFS = "hdfs>=2.5.8, <3.0"
-S3FS = "s3fs>=0.3.0, <0.5"
-
 template_files = []
 for pattern in ["**/*", "**/.*", "**/.*/**", "**/.*/.**"]:
     template_files.extend(
@@ -19,193 +12,8 @@
     )
 
 
-def _collect_requirements(requires):
-    return sorted(set(chain.from_iterable(requires.values())))
-
-
-api_require = {"api.APIDataSet": ["requests~=2.20"]}
-biosequence_require = {"biosequence.BioSequenceDataSet": ["biopython~=1.73"]}
-dask_require = {"dask.ParquetDataSet": ["dask[complete]~=2021.10", "triad>=0.6.7, <1.0"]}
-geopandas_require = {
-    "geopandas.GeoJSONDataSet": ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"]
-}
-matplotlib_require = {"matplotlib.MatplotlibWriter": ["matplotlib>=3.0.3, <4.0"]}
-holoviews_require = {"holoviews.HoloviewsWriter": ["holoviews>=1.13.0"]}
-networkx_require = {"networkx.NetworkXDataSet": ["networkx~=2.4"]}
-pandas_require = {
-    "pandas.CSVDataSet": [PANDAS],
-    "pandas.ExcelDataSet": [PANDAS, "openpyxl>=3.0.6, <4.0"],
-    "pandas.FeatherDataSet": [PANDAS],
-    "pandas.GBQTableDataSet": [PANDAS, "pandas-gbq>=0.12.0, <0.18.0"],
-    "pandas.GBQQueryDataSet": [PANDAS, "pandas-gbq>=0.12.0, <0.18.0"],
-    "pandas.HDFDataSet": [
-        PANDAS,
-        "tables~=3.6.0; platform_system == 'Windows'",
-        "tables~=3.6; platform_system != 'Windows'",
-    ],
-    "pandas.JSONDataSet": [PANDAS],
-    "pandas.ParquetDataSet": [PANDAS, "pyarrow>=1.0, <7.0"],
-    "pandas.SQLTableDataSet": [PANDAS, "SQLAlchemy~=1.2"],
-    "pandas.SQLQueryDataSet": [PANDAS, "SQLAlchemy~=1.2"],
-    "pandas.XMLDataSet": [PANDAS, "lxml~=4.6"],
-    "pandas.GenericDataSet": [PANDAS],
-}
-pickle_require = {"pickle.PickleDataSet": ["compress-pickle[lz4]~=2.1.0"]}
-pillow_require = {"pillow.ImageDataSet": ["Pillow~=9.0"]}
-video_require = {
-    "video.VideoDataSet": ["opencv-python~=4.5.5.64"]
-}
-plotly_require = {
-    "plotly.PlotlyDataSet": [PANDAS, "plotly>=4.8.0, <6.0"],
-    "plotly.JSONDataSet": ["plotly>=4.8.0, <6.0"],
-}
-redis_require = {"redis.PickleDataSet": ["redis~=4.1"]}
-spark_require = {
-    "spark.SparkDataSet": [SPARK, HDFS, S3FS],
-    "spark.SparkHiveDataSet": [SPARK, HDFS, S3FS],
-    "spark.SparkJDBCDataSet": [SPARK, HDFS, S3FS],
-    "spark.DeltaTableDataSet": [SPARK, HDFS, S3FS, "delta-spark>=1.0, <3.0"],
-}
-svmlight_require = {"svmlight.SVMLightDataSet": ["scikit-learn~=1.0.2", "scipy~=1.7.3"]}
-tensorflow_required = {
-    "tensorflow.TensorflowModelDataset": [
-        # currently only TensorFlow V2 supported for saving and loading.
-        # V1 requires HDF5 and serialises differently
-        "tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
-        # https://developer.apple.com/metal/tensorflow-plugin/
-        "tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'",
-    ]
-}
-yaml_require = {"yaml.YAMLDataSet": [PANDAS, "PyYAML>=4.2, <7.0"]}
-
-extras_require = {
-    "api": _collect_requirements(api_require),
-    "biosequence": _collect_requirements(biosequence_require),
-    "dask": _collect_requirements(dask_require),
-    "docs": [
-        # docutils>=0.17 changed the HTML
-        # see https://github.com/readthedocs/sphinx_rtd_theme/issues/1115
-        "docutils==0.16",
-        "sphinx~=5.3.0",
-        "sphinx_rtd_theme==1.2.0",
-        # Regression on sphinx-autodoc-typehints 1.21
-        # that creates some problematic docstrings
-        "sphinx-autodoc-typehints==1.20.2",
-        "sphinx_copybutton==0.3.1",
-        "sphinx-notfound-page",
-        "ipykernel>=5.3, <7.0",
-        "sphinxcontrib-mermaid~=0.7.1",
-        "myst-parser~=1.0.0",
-        "Jinja2<3.1.0",
-        "kedro-datasets[all]~=1.7.0",
-    ],
-    "geopandas": _collect_requirements(geopandas_require),
-    "matplotlib": _collect_requirements(matplotlib_require),
-    "holoviews": _collect_requirements(holoviews_require),
-    "networkx": _collect_requirements(networkx_require),
-    "pandas": _collect_requirements(pandas_require),
-    "pickle": _collect_requirements(pickle_require),
-    "pillow": _collect_requirements(pillow_require),
-    "video": _collect_requirements(video_require),
-    "plotly": _collect_requirements(plotly_require),
-    "redis": _collect_requirements(redis_require),
-    "spark": _collect_requirements(spark_require),
-    "svmlight": _collect_requirements(svmlight_require),
-    "tensorflow": _collect_requirements(tensorflow_required),
-    "yaml": _collect_requirements(yaml_require),
-    **api_require,
-    **biosequence_require,
-    **dask_require,
-    **geopandas_require,
-    **matplotlib_require,
-    **holoviews_require,
-    **networkx_require,
-    **pandas_require,
-    **pickle_require,
-    **pillow_require,
-    **video_require,
-    **plotly_require,
-    **spark_require,
-    **svmlight_require,
-    **tensorflow_required,
-    **yaml_require,
-}
-
-extras_require["all"] = _collect_requirements(extras_require)
-extras_require["test"] = [
-    "adlfs>=2021.7.1, <=2022.2; python_version == '3.7'",
-    "adlfs~=2023.1; python_version >= '3.8'",
-    "bandit>=1.6.2, <2.0",
-    "behave==1.2.6",
-    "biopython~=1.73",
-    "blacken-docs==1.9.2",
-    "black~=22.0",
-    "compress-pickle[lz4]~=2.1.0",
-    "coverage[toml]",
-    "dask[complete]~=2021.10",  # pinned by Snyk to avoid a vulnerability
-    "delta-spark>=1.2.1; python_version >= '3.11'",  # 1.2.0 has a bug that breaks some of our tests: https://github.com/delta-io/delta/issues/1070
-    "delta-spark~=1.2.1; python_version < '3.11'",
-    "dill~=0.3.1",
-    "filelock>=3.4.0, <4.0",
-    "gcsfs>=2021.4, <=2023.1; python_version == '3.7'",
-    "gcsfs>=2023.1, <2023.3; python_version >= '3.8'",
-    "geopandas>=0.6.0, <1.0",
-    "hdfs>=2.5.8, <3.0",
-    "holoviews>=1.13.0",
-    "import-linter[toml]==1.8.0",
-    "ipython>=7.31.1, <8.0; python_version < '3.8'",
-    "ipython~=8.10; python_version >= '3.8'",
-    "Jinja2<3.1.0",
-    "joblib>=0.14",
-    "jupyterlab_server>=2.11.1",
-    "jupyterlab~=3.0",
-    "jupyter~=1.0",
-    "lxml~=4.6",
-    "matplotlib>=3.0.3, <3.4; python_version < '3.10'",  # 3.4.0 breaks holoviews
-    "matplotlib>=3.5, <3.6; python_version >= '3.10'",
-    "memory_profiler>=0.50.0, <1.0",
-    "moto==1.3.7; python_version < '3.10'",
-    "moto==4.1.12; python_version >= '3.10'",
-    "networkx~=2.4",
-    "opencv-python~=4.5.5.64",
-    "openpyxl>=3.0.3, <4.0",
-    "pandas-gbq>=0.12.0, <0.18.0; python_version < '3.11'",
-    "pandas-gbq>=0.18.0; python_version >= '3.11'",
-    "pandas~=1.3  # 1.3 for read_xml/to_xml",
-    "Pillow~=9.0",
-    "plotly>=4.8.0, <6.0",
-    "pre-commit>=2.9.2, <3.0",  # The hook `mypy` requires pre-commit version 2.9.2.
-    "pyarrow>=1.0; python_version < '3.11'",
-    "pyarrow>=7.0; python_version >= '3.11'",  # Adding to avoid numpy build errors
-    "pyproj~=3.0",
-    "pyspark>=2.2, <3.4; python_version < '3.11'",
-    "pyspark>=3.4; python_version >= '3.11'",
-    "pytest-cov~=3.0",
-    "pytest-mock>=1.7.1, <2.0",
-    "pytest-xdist[psutil]~=2.2.1",
-    "pytest~=7.2",
-    "redis~=4.1",
-    "requests-mock~=1.6",
-    "requests~=2.20",
-    "s3fs>=0.3.0, <0.5",  # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem.
-    "scikit-learn>=1.0.2,<2",
-    "scipy>=1.7.3",
-    "semver",
-    "SQLAlchemy~=1.2",
-    "tables~=3.6.0; platform_system == 'Windows' and python_version<'3.8'",
-    "tables~=3.8.0; platform_system == 'Windows' and python_version>='3.8'",  # Import issues with python 3.8 with pytables pinning to 3.8.0 fixes this https://github.com/PyTables/PyTables/issues/933#issuecomment-1555917593
-    "tables~=3.6; platform_system != 'Windows'",
-    "tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
-    # https://developer.apple.com/metal/tensorflow-plugin/
-    "tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'",
-    "triad>=0.6.7, <1.0",
-    "trufflehog~=2.1",
-    "xlsxwriter~=1.0",
-]
-
 setup(
     package_data={
         "kedro": ["py.typed"] + template_files
     },
-    extras_require=extras_require,
 )
@@ -13,7 +13,7 @@
 import toml
 import yaml
 from attrs.exceptions import FrozenInstanceError
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from kedro import __version__ as kedro_version
 from kedro.config import ConfigLoader, MissingConfigException

@@ -8,7 +8,7 @@
 import pandas as pd
 import pytest
 from kedro_datasets.pandas import CSVDataSet, ParquetDataSet
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from kedro.io import (
     AbstractDataset,
@@ -762,16 +762,18 @@ def test_replacing_nonword_characters(self):
         assert "ds3__csv" in catalog.datasets.__dict__
         assert "jalapeño" in catalog.datasets.__dict__
 
-    def test_no_versions_with_cloud_protocol(self, monkeypatch):
+    def test_no_versions_with_cloud_protocol(self, monkeypatch, mocker):
         """Check the error if no versions are available for load from cloud storage"""
         monkeypatch.setenv("AWS_ACCESS_KEY_ID", "dummmy")
         monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "dummmy")
         version = Version(load=None, save=None)
+        mocker.patch("kedro_datasets.pandas.csv_dataset.fsspec")
         versioned_dataset = CSVDataSet("s3://bucket/file.csv", version=version)
         pattern = re.escape(
             f"Did not find any versions for {versioned_dataset}. "
             f"This could be due to insufficient permission."
         )
+
         with pytest.raises(DatasetError, match=pattern):
             versioned_dataset.load()
 

@@ -11,7 +11,7 @@
 from kedro_datasets.pickle import PickleDataSet
 from kedro_datasets.text import TextDataSet
 from moto import mock_s3
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from kedro.io import AbstractDataset, DatasetError, IncrementalDataset
 from kedro.io.data_catalog import CREDENTIALS_KEY

@@ -9,7 +9,7 @@
 import s3fs
 from kedro_datasets.pandas import CSVDataSet, ParquetDataSet
 from moto import mock_s3
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from kedro.io import DatasetError, PartitionedDataset
 from kedro.io.data_catalog import CREDENTIALS_KEY