Skip to content

Commit

Permalink
Merge branch 'main' into ss/add-dependencies-support
Browse files Browse the repository at this point in the history
  • Loading branch information
SiddhantSadangi authored Jan 26, 2024
2 parents daeb146 + 04a3f03 commit ac1c7a6
Show file tree
Hide file tree
Showing 9 changed files with 44 additions and 38 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
max-parallel: 4
matrix:
os: [ubuntu-latest, macos-latest]
python-version: [3.8, 3.9, "3.10", "3.11"]
python-version: [3.9, "3.10", "3.11"]
steps:
- uses: actions/checkout@v3

Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
### Features
- Added [`dependencies`](https://docs.neptune.ai/logging/dependencies/) support ([#80](https://github.com/neptune-ai/kedro-neptune/pull/80))

### Fixes
- Replaced `*DataSet` and `data_set` with `*Dataset` and `dataset` respectively ([#79](https://github.com/neptune-ai/kedro-neptune/pull/79))

## 0.3.0

### Features
Expand Down
14 changes: 7 additions & 7 deletions examples/planets/conf/base/catalog_neptune.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
# You can log files to Neptune via NeptuneFileDataSet
# You can log files to Neptune via NeptuneFileDataset
#
# example_artifact:
# type: kedro_neptune.NeptuneFileDataSet
# type: kedro_neptune.NeptuneFileDataset
# filepath: data/06_models/clf_model.pkl
#
# If you want to log existing Kedro Dataset to Neptune add @neptune to the DataSet name
# If you want to log existing Kedro Dataset to Neptune add @neptune to the Dataset name
#
# example_iris_data@neptune:
# type: kedro_neptune.NeptuneFileDataSet
# type: kedro_neptune.NeptuneFileDataset
# filepath: data/01_raw/iris.csv
#
# You can use kedro_neptune.NeptuneFileDataSet in any catalog including conf/base/catalog.yml
# You can use kedro_neptune.NeptuneFileDataset in any catalog including conf/base/catalog.yml
#
planets@neptune:
type: kedro_neptune.NeptuneFileDataSet
type: kedro_neptune.NeptuneFileDataset
filepath: data/planets/planets.csv

logo:
type: kedro_neptune.NeptuneFileDataSet
type: kedro_neptune.NeptuneFileDataset
filepath: data/planets/logo.png
2 changes: 1 addition & 1 deletion examples/planets/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[tool.kedro]
package_name = "planets"
project_name = "planets"
kedro_init_version = "0.18.5"
kedro_init_version = "0.19.1"
4 changes: 2 additions & 2 deletions examples/planets/src/planets/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
"config_patterns": {
# "spark" : ["spark*/"],
# "parameters": ["parameters*", "parameters*/**", "**/parameters*"],
"credentials_neptune": ["credentials_neptune*"],
"neptune": ["neptune*"],
"credentials_neptune": ["*/credentials_neptune*"],
"neptune": ["*/neptune*"],
}
}

Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ style = "semver"
pattern = "default-unprefixed"

[tool.poetry.dependencies]
python = "^3.7"
python = "^3.9"

# Python lack of functionalities from future versions
importlib-metadata = { version = "*", python = "<3.8" }

# Base requirements
kedro = ">=0.18.5"
kedro-datasets = ">=1.8.0"
kedro = ">=0.19.0"
kedro-datasets = ">=2.0.0"
"ruamel.yaml" = "^0.17.0"

# dev
Expand Down
42 changes: 21 additions & 21 deletions src/kedro_neptune/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.
#

__all__ = ["NeptuneRunDataSet", "NeptuneFileDataSet", "neptune_hooks", "init", "__version__"]
__all__ = ["NeptuneRunDataset", "NeptuneFileDataset", "neptune_hooks", "init", "__version__"]

import hashlib
import json
Expand All @@ -35,7 +35,7 @@
from kedro.framework.startup import ProjectMetadata
from kedro.io import (
DataCatalog,
MemoryDataSet,
MemoryDataset,
)
from kedro.io.core import (
AbstractDataset,
Expand Down Expand Up @@ -98,19 +98,19 @@ def neptune_commands():
"""

INITIAL_NEPTUNE_CATALOG = """\
# You can log files to Neptune via NeptuneFileDataSet
# You can log files to Neptune via NeptuneFileDataset
#
# example_artifact:
# type: kedro_neptune.NeptuneFileDataSet
# type: kedro_neptune.NeptuneFileDataset
# filepath: data/06_models/clf_model.pkl
#
# If you want to log existing Kedro Dataset to Neptune add @neptune to the DataSet name
# If you want to log existing Kedro Dataset to Neptune add @neptune to the Dataset name
#
# example_iris_data@neptune:
# type: kedro_neptune.NeptuneFileDataSet
# type: kedro_neptune.NeptuneFileDataset
# filepath: data/01_raw/iris.csv
#
# You can use kedro_neptune.NeptuneFileDataSet in any catalog including conf/base/catalog.yml
# You can use kedro_neptune.NeptuneFileDataset in any catalog including conf/base/catalog.yml
#
"""

Expand Down Expand Up @@ -148,7 +148,7 @@ def init(
After initializing it, whenever you run '$ kedro run', you will log:
* parameters
* pipeline execution configuration (run_params)
* metadata about Kedro DataSets
* metadata about Kedro Datasets
* hardware consumption and node execution times
* configuration files from the conf/base directory
* full Kedro run command
Expand Down Expand Up @@ -226,7 +226,7 @@ def _connection_mode(enabled: bool) -> str:
return "async" if enabled else "debug"


class NeptuneRunDataSet(AbstractDataset):
class NeptuneRunDataset(AbstractDataset):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._run: Optional[neptune.Run] = None
Expand Down Expand Up @@ -287,7 +287,7 @@ def _release(self) -> None:
self._loaded = False


class BinaryFileDataSet(TextDataset):
class BinaryFileDataset(TextDataset):
def __init__(
self,
filepath: str,
Expand Down Expand Up @@ -318,11 +318,11 @@ def _load(self) -> bytes:
return fs_file.read()


class NeptuneFileDataSet(BinaryFileDataSet):
"""NeptuneFileDataSet is a Kedro DataSet that lets you log files to Neptune.
class NeptuneFileDataset(BinaryFileDataset):
"""NeptuneFileDataset is a Kedro Dataset that lets you log files to Neptune.
It can be any file on the POSIX compatible filesystem.
To log it, you need to define the NeptuneFileDataSet in any Kedro catalog, including catalog.yml.
To log it, you need to define the NeptuneFileDataset in any Kedro catalog, including catalog.yml.
Args:
filepath: Filepath in POSIX format to a text file prefixed with a protocol like s3://.
Expand All @@ -336,17 +336,17 @@ class NeptuneFileDataSet(BinaryFileDataSet):
Log a file to Neptune from any Kedro catalog YML file:
example_model_file:
type: kedro_neptune.NeptuneFileDataSet
type: kedro_neptune.NeptuneFileDataset
filepath: data/06_models/clf.pkl
Log a file to Neptune that has already been defined as a Kedro DataSet in any catalog YML file:
Log a file to Neptune that has already been defined as a Kedro Dataset in any catalog YML file:
example_iris_data:
type: pandas.CSVDataset
filepath: data/01_raw/iris.csv
example_iris_data@neptune:
type: kedro_neptune.NeptuneFileDataSet
type: kedro_neptune.NeptuneFileDataset
filepath: data/01_raw/iris.csv
For details, see the documentation:
Expand All @@ -363,7 +363,7 @@ def __init__(
super().__init__(filepath=filepath, version=None, credentials=credentials, fs_args=fs_args)


def log_file_dataset(namespace: Handler, name: str, dataset: NeptuneFileDataSet):
def log_file_dataset(namespace: Handler, name: str, dataset: NeptuneFileDataset):
if not namespace.container.exists(f"{namespace._path}/{name}"):
data = dataset.load()
extension = dataset._describe().get("extension")
Expand Down Expand Up @@ -400,12 +400,12 @@ def log_dataset_metadata(namespace: Handler, name: str, dataset: AbstractDataset
def log_data_catalog_metadata(namespace: Handler, catalog: DataCatalog):
namespace = namespace["catalog"]

for name, dataset in catalog._data_sets.items():
for name, dataset in catalog._datasets.items():
if dataset.exists() and not namespace.container.exists(join_paths(namespace._path, name)):
if not isinstance(dataset, MemoryDataSet) and not isinstance(dataset, NeptuneRunDataSet):
if not isinstance(dataset, MemoryDataset) and not isinstance(dataset, NeptuneRunDataset):
log_dataset_metadata(namespace=namespace["datasets"], name=name, dataset=dataset)

if isinstance(dataset, NeptuneFileDataSet):
if isinstance(dataset, NeptuneFileDataset):
log_file_dataset(namespace=namespace["files"], name=name, dataset=dataset)

log_parameters(namespace=namespace, catalog=catalog)
Expand Down Expand Up @@ -441,7 +441,7 @@ def after_catalog_created(self, catalog: DataCatalog) -> None:
if config.enabled:
os.environ["NEPTUNE_CUSTOM_RUN_ID"] = self._run_id

catalog.add(data_set_name="neptune_run", data_set=NeptuneRunDataSet())
catalog.add(dataset_name="neptune_run", dataset=NeptuneRunDataset())

@hook_impl
def before_pipeline_run(self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog) -> None:
Expand Down
7 changes: 5 additions & 2 deletions tests/kedro_neptune/utils/kedro_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
#
__all__ = ["run_pipeline"]

from pathlib import Path
from typing import (
Any,
Dict,
)

from kedro.framework.project import configure_project
from kedro.framework.session import KedroSession
from kedro.framework.startup import bootstrap_project


def run_pipeline(project: str, run_params: Dict[str, Any] = None, session_params: Dict[str, Any] = None):
Expand All @@ -30,8 +32,9 @@ def run_pipeline(project: str, run_params: Dict[str, Any] = None, session_params

if session_params is None:
session_params = {}

configure_project(project)

with KedroSession.create(project, **session_params) as session:
bootstrap_project(Path(".").resolve())

with KedroSession.create(**session_params) as session:
session.run(**run_params)
2 changes: 1 addition & 1 deletion tests/kedro_neptune/utils/run_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def assert_structure(travel_speed: int = 10000):
"filepath": f"{os.getcwd()}/data/planets/planets.csv",
"name": "planets@neptune",
"protocol": "file",
"type": "NeptuneFileDataSet",
"type": "NeptuneFileDataset",
"version": "None",
}
assert run.exists("kedro/catalog/files/planets@neptune")
Expand Down

0 comments on commit ac1c7a6

Please sign in to comment.