Merge branch 'main' into ss/add-dependencies-support

neptune-ai · Jan 26, 2024 · ac1c7a6 · ac1c7a6
2 parents daeb146 + 04a3f03
commit ac1c7a6
Show file tree

Hide file tree

Showing 9 changed files with 44 additions and 38 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -7,7 +7,7 @@ jobs:
       max-parallel: 4
       matrix:
         os: [ubuntu-latest, macos-latest]
-        python-version: [3.8, 3.9, "3.10", "3.11"]
+        python-version: [3.9, "3.10", "3.11"]
     steps:
       - uses: actions/checkout@v3
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,9 @@
 ### Features
 - Added [`dependencies`](https://docs.neptune.ai/logging/dependencies/) support ([#80](https://github.com/neptune-ai/kedro-neptune/pull/80))
 
+### Fixes
+- Replaced `*DataSet` and `data_set` with `*Dataset` and `dataset` respectively ([#79](https://github.com/neptune-ai/kedro-neptune/pull/79))
+
 ## 0.3.0
 
 ### Features

diff --git a/examples/planets/conf/base/catalog_neptune.yml b/examples/planets/conf/base/catalog_neptune.yml
@@ -1,21 +1,21 @@
-# You can log files to Neptune via NeptuneFileDataSet
+# You can log files to Neptune via NeptuneFileDataset
 #
 # example_artifact:
-#   type: kedro_neptune.NeptuneFileDataSet
+#   type: kedro_neptune.NeptuneFileDataset
 #   filepath: data/06_models/clf_model.pkl
 #
-# If you want to log existing Kedro Dataset to Neptune add @neptune to the DataSet name
+# If you want to log existing Kedro Dataset to Neptune add @neptune to the Dataset name
 #
 # example_iris_data@neptune:
-#   type: kedro_neptune.NeptuneFileDataSet
+#   type: kedro_neptune.NeptuneFileDataset
 #   filepath: data/01_raw/iris.csv
 #
-# You can use kedro_neptune.NeptuneFileDataSet in any catalog including conf/base/catalog.yml
+# You can use kedro_neptune.NeptuneFileDataset in any catalog including conf/base/catalog.yml
 #
 planets@neptune:
-  type: kedro_neptune.NeptuneFileDataSet
+  type: kedro_neptune.NeptuneFileDataset
   filepath: data/planets/planets.csv
 
 logo:
-  type: kedro_neptune.NeptuneFileDataSet
+  type: kedro_neptune.NeptuneFileDataset
   filepath: data/planets/logo.png
diff --git a/examples/planets/pyproject.toml b/examples/planets/pyproject.toml
@@ -1,4 +1,4 @@
 [tool.kedro]
 package_name = "planets"
 project_name = "planets"
-kedro_init_version = "0.18.5"
+kedro_init_version = "0.19.1"
diff --git a/examples/planets/src/planets/settings.py b/examples/planets/src/planets/settings.py
@@ -35,8 +35,8 @@
     "config_patterns": {
         #   "spark" : ["spark*/"],
         #   "parameters": ["parameters*", "parameters*/**", "**/parameters*"],
-        "credentials_neptune": ["credentials_neptune*"],
-        "neptune": ["neptune*"],
+        "credentials_neptune": ["*/credentials_neptune*"],
+        "neptune": ["*/neptune*"],
     }
 }
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -9,14 +9,14 @@ style = "semver"
 pattern = "default-unprefixed"
 
 [tool.poetry.dependencies]
-python = "^3.7"
+python = "^3.9"
 
 # Python lack of functionalities from future versions
 importlib-metadata = { version = "*", python = "<3.8" }
 
 # Base requirements
-kedro = ">=0.18.5"
-kedro-datasets = ">=1.8.0"
+kedro = ">=0.19.0"
+kedro-datasets = ">=2.0.0"
 "ruamel.yaml" = "^0.17.0"
 
 # dev

diff --git a/src/kedro_neptune/__init__.py b/src/kedro_neptune/__init__.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 
-__all__ = ["NeptuneRunDataSet", "NeptuneFileDataSet", "neptune_hooks", "init", "__version__"]
+__all__ = ["NeptuneRunDataset", "NeptuneFileDataset", "neptune_hooks", "init", "__version__"]
 
 import hashlib
 import json
@@ -35,7 +35,7 @@
 from kedro.framework.startup import ProjectMetadata
 from kedro.io import (
     DataCatalog,
-    MemoryDataSet,
+    MemoryDataset,
 )
 from kedro.io.core import (
     AbstractDataset,
@@ -98,19 +98,19 @@ def neptune_commands():
 """
 
 INITIAL_NEPTUNE_CATALOG = """\
-# You can log files to Neptune via NeptuneFileDataSet
+# You can log files to Neptune via NeptuneFileDataset
 #
 # example_artifact:
-#   type: kedro_neptune.NeptuneFileDataSet
+#   type: kedro_neptune.NeptuneFileDataset
 #   filepath: data/06_models/clf_model.pkl
 #
-# If you want to log existing Kedro Dataset to Neptune add @neptune to the DataSet name
+# If you want to log existing Kedro Dataset to Neptune add @neptune to the Dataset name
 #
 # example_iris_data@neptune:
-#   type: kedro_neptune.NeptuneFileDataSet
+#   type: kedro_neptune.NeptuneFileDataset
 #   filepath: data/01_raw/iris.csv
 #
-# You can use kedro_neptune.NeptuneFileDataSet in any catalog including conf/base/catalog.yml
+# You can use kedro_neptune.NeptuneFileDataset in any catalog including conf/base/catalog.yml
 #
 """
 
@@ -148,7 +148,7 @@ def init(
     After initializing it, whenever you run '$ kedro run', you will log:
     * parameters
     * pipeline execution configuration (run_params)
-    * metadata about Kedro DataSets
+    * metadata about Kedro Datasets
     * hardware consumption and node execution times
     * configuration files from the conf/base directory
     * full Kedro run command
@@ -226,7 +226,7 @@ def _connection_mode(enabled: bool) -> str:
     return "async" if enabled else "debug"
 
 
-class NeptuneRunDataSet(AbstractDataset):
+class NeptuneRunDataset(AbstractDataset):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._run: Optional[neptune.Run] = None
@@ -287,7 +287,7 @@ def _release(self) -> None:
             self._loaded = False
 
 
-class BinaryFileDataSet(TextDataset):
+class BinaryFileDataset(TextDataset):
     def __init__(
         self,
         filepath: str,
@@ -318,11 +318,11 @@ def _load(self) -> bytes:
             return fs_file.read()
 
 
-class NeptuneFileDataSet(BinaryFileDataSet):
-    """NeptuneFileDataSet is a Kedro DataSet that lets you log files to Neptune.
+class NeptuneFileDataset(BinaryFileDataset):
+    """NeptuneFileDataset is a Kedro Dataset that lets you log files to Neptune.
 
     It can be any file on the POSIX compatible filesystem.
-    To log it, you need to define the NeptuneFileDataSet in any Kedro catalog, including catalog.yml.
+    To log it, you need to define the NeptuneFileDataset in any Kedro catalog, including catalog.yml.
 
     Args:
         filepath: Filepath in POSIX format to a text file prefixed with a protocol like s3://.
@@ -336,17 +336,17 @@ class NeptuneFileDataSet(BinaryFileDataSet):
         Log a file to Neptune from any Kedro catalog YML file:
 
             example_model_file:
-                type: kedro_neptune.NeptuneFileDataSet
+                type: kedro_neptune.NeptuneFileDataset
                 filepath: data/06_models/clf.pkl
 
-        Log a file to Neptune that has already been defined as a Kedro DataSet in any catalog YML file:
+        Log a file to Neptune that has already been defined as a Kedro Dataset in any catalog YML file:
 
             example_iris_data:
                 type: pandas.CSVDataset
                 filepath: data/01_raw/iris.csv
 
             example_iris_data@neptune:
-                type: kedro_neptune.NeptuneFileDataSet
+                type: kedro_neptune.NeptuneFileDataset
                 filepath: data/01_raw/iris.csv
 
     For details, see the documentation:
@@ -363,7 +363,7 @@ def __init__(
         super().__init__(filepath=filepath, version=None, credentials=credentials, fs_args=fs_args)
 
 
-def log_file_dataset(namespace: Handler, name: str, dataset: NeptuneFileDataSet):
+def log_file_dataset(namespace: Handler, name: str, dataset: NeptuneFileDataset):
     if not namespace.container.exists(f"{namespace._path}/{name}"):
         data = dataset.load()
         extension = dataset._describe().get("extension")
@@ -400,12 +400,12 @@ def log_dataset_metadata(namespace: Handler, name: str, dataset: AbstractDataset
 def log_data_catalog_metadata(namespace: Handler, catalog: DataCatalog):
     namespace = namespace["catalog"]
 
-    for name, dataset in catalog._data_sets.items():
+    for name, dataset in catalog._datasets.items():
         if dataset.exists() and not namespace.container.exists(join_paths(namespace._path, name)):
-            if not isinstance(dataset, MemoryDataSet) and not isinstance(dataset, NeptuneRunDataSet):
+            if not isinstance(dataset, MemoryDataset) and not isinstance(dataset, NeptuneRunDataset):
                 log_dataset_metadata(namespace=namespace["datasets"], name=name, dataset=dataset)
 
-            if isinstance(dataset, NeptuneFileDataSet):
+            if isinstance(dataset, NeptuneFileDataset):
                 log_file_dataset(namespace=namespace["files"], name=name, dataset=dataset)
 
     log_parameters(namespace=namespace, catalog=catalog)
@@ -441,7 +441,7 @@ def after_catalog_created(self, catalog: DataCatalog) -> None:
         if config.enabled:
             os.environ["NEPTUNE_CUSTOM_RUN_ID"] = self._run_id
 
-        catalog.add(data_set_name="neptune_run", data_set=NeptuneRunDataSet())
+        catalog.add(dataset_name="neptune_run", dataset=NeptuneRunDataset())
 
     @hook_impl
     def before_pipeline_run(self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog) -> None:

diff --git a/tests/kedro_neptune/utils/kedro_utils.py b/tests/kedro_neptune/utils/kedro_utils.py
@@ -15,13 +15,15 @@
 #
 __all__ = ["run_pipeline"]
 
+from pathlib import Path
 from typing import (
     Any,
     Dict,
 )
 
 from kedro.framework.project import configure_project
 from kedro.framework.session import KedroSession
+from kedro.framework.startup import bootstrap_project
 
 
 def run_pipeline(project: str, run_params: Dict[str, Any] = None, session_params: Dict[str, Any] = None):
@@ -30,8 +32,9 @@ def run_pipeline(project: str, run_params: Dict[str, Any] = None, session_params
 
     if session_params is None:
         session_params = {}
-
     configure_project(project)
 
-    with KedroSession.create(project, **session_params) as session:
+    bootstrap_project(Path(".").resolve())
+
+    with KedroSession.create(**session_params) as session:
         session.run(**run_params)
diff --git a/tests/kedro_neptune/utils/run_utils.py b/tests/kedro_neptune/utils/run_utils.py
@@ -70,7 +70,7 @@ def assert_structure(travel_speed: int = 10000):
             "filepath": f"{os.getcwd()}/data/planets/planets.csv",
             "name": "planets@neptune",
             "protocol": "file",
-            "type": "NeptuneFileDataSet",
+            "type": "NeptuneFileDataset",
             "version": "None",
         }
         assert run.exists("kedro/catalog/files/planets@neptune")