Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Neat 357 dms loader json type extension #537

Merged
merged 20 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.PHONY: run-explorer run-tests run-linters build-ui build-python build-docker run-docker compose-up
version="0.85.11"
version="0.85.12"
run-explorer:
@echo "Running explorer API server..."
# open "http://localhost:8000/static/index.html" || true
Expand Down
2 changes: 1 addition & 1 deletion cognite/neat/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.85.11"
__version__ = "0.85.12"
38 changes: 33 additions & 5 deletions cognite/neat/graph/loaders/_rdf2dms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collections import defaultdict
from collections.abc import Iterable, Sequence
from pathlib import Path
from typing import Any
from typing import Any, get_args

import yaml
from cognite.client import CogniteClient
Expand All @@ -20,7 +20,7 @@
from cognite.neat.graph.stores import NeatGraphStore
from cognite.neat.issues import NeatIssue, NeatIssueList
from cognite.neat.rules.models import DMSRules
from cognite.neat.rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE
from cognite.neat.rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json
from cognite.neat.utils.upload import UploadResult
from cognite.neat.utils.utils import create_sha256_hash

Expand Down Expand Up @@ -145,7 +145,9 @@ def _create_validation_classes(
issues = NeatIssueList[NeatIssue]()
field_definitions: dict[str, tuple[type, Any]] = {}
edge_by_property: dict[str, dm.EdgeConnection] = {}
validators: dict[str, classmethod] = {}
direct_relation_by_property: dict[str, dm.DirectRelation] = {}
json_fields: list[str] = []
for prop_name, prop in view.properties.items():
if isinstance(prop, dm.EdgeConnection):
edge_by_property[prop_name] = prop
Expand All @@ -163,6 +165,9 @@ def _create_validation_classes(
)
)
continue

if data_type == Json:
json_fields.append(prop_name)
python_type = data_type.python
if prop.type.is_list:
python_type = list[python_type]
Expand All @@ -175,13 +180,29 @@ def _create_validation_classes(
field_definitions[prop_name] = (python_type, default_value)

def parse_list(cls, value: Any, info: ValidationInfo) -> list[str]:
if isinstance(value, list) and cls.model_fields[info.field_name].annotation is not list:
if isinstance(value, list) and list.__name__ not in _get_field_value_types(cls, info):
if len(value) == 1:
return value[0]
raise ValueError(f"Got multiple values for {info.field_name}: {value}")

return value

validators: dict[str, classmethod] = {"parse_list": field_validator("*", mode="before")(parse_list)} # type: ignore[dict-item,arg-type]
def parse_json_string(cls, value: Any, info: ValidationInfo) -> dict:
if isinstance(value, dict):
return value
elif isinstance(value, str):
try:
return json.loads(value)
except json.JSONDecodeError as error:
raise ValueError(f"Not valid JSON string for {info.field_name}: {value}, error {error}") from error
else:
raise ValueError(f"Expect valid JSON string or dict for {info.field_name}: {value}")

if json_fields:
validators["parse_json_string"] = field_validator(*json_fields, mode="before")(parse_json_string) # type: ignore[assignment, arg-type]

validators["parse_list"] = field_validator("*", mode="before")(parse_list) # type: ignore[assignment, arg-type]

if direct_relation_by_property:

def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
Expand Down Expand Up @@ -280,7 +301,10 @@ def _upload_to_cdf(
result.created.update(item.as_id() for item in e.successful)
yield result
else:
for instance_type, instances in {"Nodes": upserted.nodes, "Edges": upserted.edges}.items():
for instance_type, instances in {
"Nodes": upserted.nodes,
"Edges": upserted.edges,
}.items():
result = UploadResult[InstanceId](name=instance_type, issues=read_issues)
for instance in instances: # type: ignore[attr-defined]
if instance.was_modified and instance.created_time == instance.last_updated_time:
Expand All @@ -292,6 +316,10 @@ def _upload_to_cdf(
yield result


def _get_field_value_types(cls, info):
return [type_.__name__ for type_ in get_args(cls.model_fields[info.field_name].annotation)]


def _triples2dictionary(
triples: Iterable[tuple[str, str, str]],
) -> dict[str, dict[str, list[str]]]:
Expand Down
5 changes: 4 additions & 1 deletion cognite/neat/rules/importers/_inference2rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ def __init__(
self.graph = graph
self.max_number_of_instance = max_number_of_instance
self.prefix = prefix
self.check_for_json_string = check_for_json_string
self.check_for_json_string = (
check_for_json_string if graph.store.__class__.__name__ != "OxigraphStore" else False
)

@classmethod
def from_graph_store(
Expand Down Expand Up @@ -235,6 +237,7 @@ def _to_rules_components(
query.replace("instance_id", instance)
): # type: ignore[misc]
property_id = remove_namespace_from_uri(property_uri)

self._add_uri_namespace_to_prefixes(cast(URIRef, property_uri), prefixes)
value_type_uri = data_type_uri if data_type_uri else object_type_uri

Expand Down
8 changes: 4 additions & 4 deletions cognite/neat/rules/models/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class Literal(DataType):

class Timeseries(DataType):
name = "timeseries"
python = dms.TimeSeriesReference
python = str
dms = dms.TimeSeriesReference
graphql = "TimeSeries"
xsd = "string"
Expand All @@ -246,7 +246,7 @@ class Timeseries(DataType):

class File(DataType):
name = "file"
python = dms.FileReference
python = str
dms = dms.FileReference
graphql = "File"
xsd = "string"
Expand All @@ -255,7 +255,7 @@ class File(DataType):

class Sequence(DataType):
name = "sequence"
python = dms.SequenceReference
python = str
dms = dms.SequenceReference
graphql = "Sequence"
xsd = "string"
Expand All @@ -264,7 +264,7 @@ class Sequence(DataType):

class Json(DataType):
name = "json"
python = dms.Json
python = dict
dms = dms.Json
graphql = "Json"
xsd = "json"
Expand Down
7 changes: 7 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ Changes are grouped as follows:
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [0.85.12] - 11-07-24
### Added
- Added handling of Json fields in DMS loader

### Fixed
- DMS related datatype .python being wrongly mapped to python types

## [0.85.11] - 10-07-24
### Added
- Method `create_reference` to `DMSRules` to add reference dms rules and do the mapping of properties
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cognite-neat"
version = "0.85.11"
version = "0.85.12"
readme = "README.md"
description = "Knowledge graph transformation"
authors = [
Expand Down
23 changes: 23 additions & 0 deletions tests/tests_unit/graph/test_loaders/test_dms_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from cognite.neat.graph.extractors import AssetsExtractor
from cognite.neat.graph.loaders import DMSLoader
from cognite.neat.graph.stores import NeatGraphStore
from cognite.neat.rules.importers import InferenceImporter
from tests.config import CLASSIC_CDF_EXTRACTOR_DATA


def test_metadata_as_json_filed():
store = NeatGraphStore.from_memory_store()
store.write(AssetsExtractor.from_file(CLASSIC_CDF_EXTRACTOR_DATA / "assets.yaml", unpack_metadata=False))

importer = InferenceImporter.from_graph_store(store, check_for_json_string=True, prefix="some-prefix")

rules, _ = importer.to_rules()
store.add_rules(rules)

dms_rules = rules.as_dms_architect_rules()

loader = DMSLoader.from_rules(dms_rules, store, dms_rules.metadata.space)
instances = {instance.external_id: instance for instance in loader._load()}

# metadata not unpacked but kept as Json obj
assert isinstance(instances["Asset_4288662884680989"].sources[0].properties["metadata"], dict)