Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NEAT-338] 🤓 Notebook representation of each module (and missing documentation) #521

Merged
merged 12 commits into from
Jun 27, 2024
18 changes: 17 additions & 1 deletion cognite/neat/_shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,24 @@ def dump(self, aggregate: bool = True) -> dict[str, Any]:
"""Return a dictionary representation of the object."""
raise NotImplementedError()

def _repr_html_(self) -> str:
return pd.Series(self.dump(aggregate=True)).to_frame(name="value")._repr_html_()


@dataclass(frozen=True)
class FrozenNeatObject:
"""A frozen neat object can be dumped to a dictionary."""

@abstractmethod
def dump(self, aggregate: bool = True) -> dict[str, Any]:
"""Return a dictionary representation of the object."""
raise NotImplementedError()

def _repr_html_(self) -> str:
return pd.Series(self.dump(aggregate=True)).to_frame(name="value")._repr_html_()


T_NeatObject = TypeVar("T_NeatObject", bound=NeatObject)
T_NeatObject = TypeVar("T_NeatObject", bound=NeatObject | FrozenNeatObject)


class NeatList(list, Sequence[T_NeatObject]):
Expand Down
22 changes: 22 additions & 0 deletions cognite/neat/graph/extractors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from ._base import BaseExtractor
from ._classic_cdf._assets import AssetsExtractor
from ._classic_cdf._events import EventsExtractor
from ._classic_cdf._files import FilesExtractor
Expand All @@ -10,6 +11,7 @@
from ._rdf_file import RdfFileExtractor

__all__ = [
"BaseExtractor",
"AssetsExtractor",
"MockGraphGenerator",
"RelationshipsExtractor",
Expand All @@ -35,3 +37,23 @@
| RdfFileExtractor
| DexpiExtractor
)


def _repr_html_() -> str:
import pandas as pd

table = pd.DataFrame( # type: ignore[operator]
[
{
"Extractor": name,
"Description": globals()[name].__doc__.strip().split("\n")[0] if globals()[name].__doc__ else "Missing",
}
for name in __all__
if name != "BaseExtractor"
]
)._repr_html_()

return (
"<strong>Extractor</strong> An extractor is used to read data from "
f"a source into Neat's internal triple storage. <br />{table}"
)
5 changes: 5 additions & 0 deletions cognite/neat/graph/extractors/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from collections.abc import Iterable

from cognite.neat.graph.models import Triple
from cognite.neat.utils.auxiliary import class_html_doc


class BaseExtractor:
Expand All @@ -12,3 +13,7 @@ class BaseExtractor:
@abstractmethod
def extract(self) -> Iterable[Triple]:
raise NotImplementedError()

@classmethod
def _repr_html_(cls) -> str:
return class_html_doc(cls)
7 changes: 7 additions & 0 deletions cognite/neat/graph/extractors/_classic_cdf/_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@


class AssetsExtractor(BaseExtractor):
"""Extract data from Cognite Data Fusions Assets into Neat.

Args:
assets (Iterable[Asset]): An iterable of assets.
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
"""

def __init__(
self,
assets: Iterable[Asset],
Expand Down
7 changes: 7 additions & 0 deletions cognite/neat/graph/extractors/_classic_cdf/_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@


class EventsExtractor(BaseExtractor):
"""Extract data from Cognite Data Fusions Events into Neat.

Args:
events (Iterable[Event]): An iterable of events.
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
"""

def __init__(
self,
events: Iterable[Event],
Expand Down
7 changes: 7 additions & 0 deletions cognite/neat/graph/extractors/_classic_cdf/_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@


class LabelsExtractor(BaseExtractor):
"""Extract data from Cognite Data Fusions Labels into Neat.

Args:
labels (Iterable[LabelDefinition]): An iterable of labels.
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
"""

def __init__(
self,
labels: Iterable[LabelDefinition],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@


class RelationshipsExtractor(BaseExtractor):
"""Extract data from Cognite Data Fusions Relationships into Neat.

Args:
relationships (Iterable[Asset]): An iterable of relationships.
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
"""

def __init__(
self,
relationships: Iterable[Relationship],
Expand Down
7 changes: 7 additions & 0 deletions cognite/neat/graph/extractors/_classic_cdf/_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@


class SequencesExtractor(BaseExtractor):
"""Extract data from Cognite Data Fusions Sequences into Neat.

Args:
sequence (Iterable[Sequence]): An iterable of sequences.
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
"""

def __init__(
self,
sequence: Iterable[Sequence],
Expand Down
7 changes: 7 additions & 0 deletions cognite/neat/graph/extractors/_classic_cdf/_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@


class TimeSeriesExtractor(BaseExtractor):
"""Extract data from Cognite Data Fusions TimeSeries into Neat.

Args:
timeseries (Iterable[TimeSeries]): An iterable of timeseries.
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
"""

def __init__(
self,
timeseries: Iterable[TimeSeries],
Expand Down
8 changes: 8 additions & 0 deletions cognite/neat/graph/extractors/_rdf_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@


class RdfFileExtractor(BaseExtractor):
"""Extract data from RDF files into Neat.

Args:
filepath (Path): The path to the RDF file.
mime_type (MIMETypes, optional): The MIME type of the RDF file. Defaults to "application/rdf+xml".
base_uri (URIRef, optional): The base URI to use. Defaults to None.
"""

def __init__(
self,
filepath: Path,
Expand Down
19 changes: 19 additions & 0 deletions cognite/neat/graph/loaders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,22 @@
from ._rdf2dms import DMSLoader

__all__ = ["BaseLoader", "CDFLoader", "DMSLoader"]


def _repr_html_() -> str:
import pandas as pd

table = pd.DataFrame( # type: ignore[operator]
[
{
"Loader": name,
"Description": globals()[name].__doc__.strip().split("\n")[0] if globals()[name].__doc__ else "Missing",
}
for name in __all__
if name not in ("BaseLoader", "CDFLoader")
]
)._repr_html_()

return (
"<strong>Loader</strong> A loader writes data from Neat's triple storage into a target system" f"<br />{table}"
)
5 changes: 5 additions & 0 deletions cognite/neat/graph/loaders/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from cognite.neat.graph import NeatGraphStore
from cognite.neat.graph.issues.loader import FailedAuthorizationError
from cognite.neat.issues import NeatIssue, NeatIssueList
from cognite.neat.utils.auxiliary import class_html_doc
from cognite.neat.utils.upload import UploadDiffsID, UploadResultIDs

T_Output = TypeVar("T_Output")
Expand All @@ -34,6 +35,10 @@ def _load(self, stop_on_exception: bool = False) -> Iterable[T_Output | NeatIssu
"""Load the graph with data."""
pass

@classmethod
def _repr_html_(cls) -> str:
return class_html_doc(cls)


class CDFLoader(BaseLoader[T_Output]):
_UPLOAD_BATCH_SIZE: ClassVar[int] = 1000
Expand Down
11 changes: 11 additions & 0 deletions cognite/neat/graph/loaders/_rdf2dms.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@


class DMSLoader(CDFLoader[dm.InstanceApply]):
"""Load data from Cognite Data Fusions Data Modeling Service (DMS) into Neat.

Args:
graph_store (NeatGraphStore): The graph store to load the data into.
data_model (dm.DataModel[dm.View] | None): The data model to load.
instance_space (str): The instance space to load the data into.
class_by_view_id (dict[ViewId, str] | None): A mapping from view id to class name. Defaults to None.
creat_issues (Sequence[NeatIssue] | None): A list of issues that occurred during reading. Defaults to None.
tracker (type[Tracker] | None): The tracker to use. Defaults to None.
"""

def __init__(
self,
graph_store: NeatGraphStore,
Expand Down
10 changes: 10 additions & 0 deletions cognite/neat/graph/stores/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,3 +282,13 @@ def transform(self, transformer: Transformers) -> None:
description=transformer.description,
)
)

def _repr_html_(self) -> str:
provenance = self.provenance._repr_html_()

return (
f"<strong>{type(self).__name__}</strong> A graph store is a container for storing triples. "
"It can be queried and transformed to extract information.<br />"
"<strong>Provenance</strong> Provenance is a record of changes that have occurred in the graph store.<br />"
f"{provenance}"
)
14 changes: 11 additions & 3 deletions cognite/neat/graph/stores/_provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@


import uuid
from collections import UserList
from collections.abc import Sequence
from dataclasses import dataclass
from datetime import datetime
from typing import TypeVar

from rdflib import PROV, RDF, Literal, URIRef

from cognite.neat._shared import FrozenNeatObject, NeatList
from cognite.neat.constants import DEFAULT_NAMESPACE


Expand Down Expand Up @@ -64,7 +64,7 @@ def as_triples(self):


@dataclass(frozen=True)
class Change:
class Change(FrozenNeatObject):
agent: Agent
activity: Activity
entity: Entity
Expand All @@ -81,11 +81,19 @@ def record(cls, activity: str, start: datetime, end: datetime, description: str)
entity = Entity(was_generated_by=activity, was_attributed_to=agent)
return cls(agent, activity, entity, description)

def dump(self, aggregate: bool = True) -> dict[str, str]:
return {
"Agent": self.agent.id_,
"Activity": self.activity.id_,
"Entity": self.entity.id_,
"Description": self.description,
}


T_Change = TypeVar("T_Change", bound=Change)


class Provenance(UserList[T_Change]):
class Provenance(NeatList[Change]):
def __init__(self, changes: Sequence[T_Change] | None = None):
super().__init__(changes or [])

Expand Down
24 changes: 23 additions & 1 deletion cognite/neat/rules/exporters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from ._rules2dms import CDFExporter, DMSExporter
from ._base import BaseExporter, CDFExporter
from ._rules2dms import DMSExporter
from ._rules2excel import ExcelExporter
from ._rules2ontology import GraphExporter, OWLExporter, SemanticDataModelExporter, SHACLExporter
from ._rules2yaml import YAMLExporter

__all__ = [
"BaseExporter",
"DMSExporter",
"CDFExporter",
"SemanticDataModelExporter",
Expand All @@ -13,3 +15,23 @@
"ExcelExporter",
"YAMLExporter",
]


def _repr_html_() -> str:
import pandas as pd

table = pd.DataFrame( # type: ignore[operator]
[
{
"Exporter": name,
"Description": globals()[name].__doc__.strip().split("\n")[0] if globals()[name].__doc__ else "Missing",
}
for name in __all__
if name not in ("BaseExporter", "CDFExporter", "GraphExporter")
]
)._repr_html_()

return (
"<strong>Exporter</strong> An exporter converts Neat's representation of a data model called <em>Rules</em>"
f" into a schema/data model for a target format.<br />{table}"
)
5 changes: 5 additions & 0 deletions cognite/neat/rules/exporters/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from cognite.neat.rules._shared import Rules
from cognite.neat.rules.models import DMSRules, InformationRules, RoleTypes
from cognite.neat.utils.auxiliary import class_html_doc
from cognite.neat.utils.upload import UploadResult, UploadResultList

T_Export = TypeVar("T_Export")
Expand Down Expand Up @@ -34,6 +35,10 @@ def _convert_to_output_role(self, rules: Rules, output_role: RoleTypes | None =
else:
raise NotImplementedError(f"Role {output_role} is not supported for {type(rules).__name__} rules")

@classmethod
def _repr_html_(cls) -> str:
return class_html_doc(cls, include_factory_methods=False)


class CDFExporter(BaseExporter[T_Export]):
@abstractmethod
Expand Down
2 changes: 1 addition & 1 deletion cognite/neat/rules/exporters/_rules2dms.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@


class DMSExporter(CDFExporter[DMSSchema]):
"""Class for exporting rules object to CDF Data Model Storage (DMS).
"""Export rules to Cognite Data Fusion's Data Model Storage (DMS) service.

Args:
export_components (frozenset[Literal["all", "spaces", "data_models", "views", "containers"]], optional):
Expand Down
6 changes: 6 additions & 0 deletions cognite/neat/rules/exporters/_rules2ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,22 @@ def export_to_file(self, rules: Rules, filepath: Path) -> None:


class OWLExporter(GraphExporter):
"""Exports rules to an OWL ontology."""

def export(self, rules: Rules) -> Graph:
return Ontology.from_rules(rules).as_owl()


class SHACLExporter(GraphExporter):
"""Exports rules to a SHACL graph."""

def export(self, rules: Rules) -> Graph:
return Ontology.from_rules(rules).as_shacl()


class SemanticDataModelExporter(GraphExporter):
"""Exports rules to a semantic data model."""

def export(self, rules: Rules) -> Graph:
return Ontology.from_rules(rules).as_semantic_data_model()

Expand Down
Loading
Loading