Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Neat 341 inference importer issues with namespace prefix collision #530

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.PHONY: run-explorer run-tests run-linters build-ui build-python build-docker run-docker compose-up
version="0.85.4"
version="0.85.5"
run-explorer:
@echo "Running explorer API server..."
# open "http://localhost:8000/static/index.html" || true
Expand Down
2 changes: 1 addition & 1 deletion cognite/neat/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.85.4"
__version__ = "0.85.5"
8 changes: 0 additions & 8 deletions cognite/neat/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,6 @@
"owl": OWL._NS,
"xsd": XSD._NS,
"pav": Namespace("http://purl.org/pav/"),
"cim": Namespace("http://iec.ch/TC57/2013/CIM-schema-cim16#"),
"icim": Namespace("http://iec.ch/TC57/2013/CIM-schema-cim16-info#"),
"entsoe": Namespace("http://entsoe.eu/CIM/SchemaExtension/3/1#"),
"entsoe2": Namespace("http://entsoe.eu/CIM/SchemaExtension/3/2#"),
"md": Namespace("http://iec.ch/TC57/61970-552/ModelDescription/1#"),
"pti": Namespace("http://www.pti-us.com/PTI_CIM-schema-cim16#"),
"tnt": Namespace("http://purl.org/cognite/tnt#"),
"neat": DEFAULT_NAMESPACE,
Comment on lines -25 to -32
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🪦

}


Expand Down
16 changes: 12 additions & 4 deletions cognite/neat/graph/loaders/_rdf2dms.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ def from_rules(cls, rules: DMSRules, graph_store: NeatGraphStore, instance_space
except Exception as e:
issues.append(
loader_issues.FailedConvertError(
identifier=rules.metadata.as_identifier(), target_format="read DMS model", reason=str(e)
identifier=rules.metadata.as_identifier(),
target_format="read DMS model",
reason=str(e),
)
)
return cls(graph_store, data_model, instance_space, {}, issues)
Expand Down Expand Up @@ -199,7 +201,11 @@ def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list
return pydantic_cls, edge_by_property, issues

def _create_node(
self, identifier: str, properties: dict[str, list[str]], pydantic_cls: type[Model], view_id: dm.ViewId
self,
identifier: str,
properties: dict[str, list[str]],
pydantic_cls: type[Model],
view_id: dm.ViewId,
) -> dm.InstanceApply:
created = pydantic_cls.model_validate(properties)

Expand Down Expand Up @@ -233,7 +239,7 @@ def _create_edges(
external_id = f"{identifier}.{prop}.{target}"
yield dm.EdgeApply(
space=self.instance_space,
external_id=external_id if len(external_id) < 256 else create_sha256_hash(external_id),
external_id=(external_id if len(external_id) < 256 else create_sha256_hash(external_id)),
type=edge.type,
start_node=dm.DirectRelationReference(self.instance_space, identifier),
end_node=dm.DirectRelationReference(self.instance_space, target),
Expand Down Expand Up @@ -290,5 +296,7 @@ def _triples2dictionary(
"""Converts list of triples to dictionary"""
values_by_property_by_identifier: dict[str, dict[str, list[str]]] = defaultdict(lambda: defaultdict(list))
for id_, property_, value in triples:
values_by_property_by_identifier[id_][property_].append(value)
# avoid issue with strings "None", "nan", "null" being treated as values
if value.lower() not in ["", "None", "nan", "null"]:
values_by_property_by_identifier[id_][property_].append(value)
return values_by_property_by_identifier
3 changes: 3 additions & 0 deletions cognite/neat/legacy/graph/examples/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@
# https://github.com/pydata/xarray/blob/main/xarray/tutorial.py
# Currently there are simple paths to the examples which are then easily loaded in the notebooks
nordic44_knowledge_graph = Path(__file__).parent / "Knowledge-Graph-Nordic44.xml"
nordic44_knowledge_graph_dirty = (
Path(__file__).parent / "Knowledge-Graph-Nordic44-dirty.xml"
)
4 changes: 2 additions & 2 deletions cognite/neat/legacy/graph/extractors/_dexpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from rdflib import Namespace

from cognite.neat.constants import PREFIXES
from cognite.neat.constants import DEFAULT_NAMESPACE
from cognite.neat.graph.extractors._dexpi import DexpiExtractor
from cognite.neat.legacy.graph.models import Triple

Expand Down Expand Up @@ -31,7 +31,7 @@ def __init__(
base_namespace: str | None = None,
):
self.filepath = Path(filepath)
self.namespace = Namespace(base_namespace) if isinstance(base_namespace, str | Namespace) else PREFIXES["neat"]
self.namespace = Namespace(base_namespace) if isinstance(base_namespace, str | Namespace) else DEFAULT_NAMESPACE

def extract(self) -> set[Triple]:
"""
Expand Down
2 changes: 1 addition & 1 deletion cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def _class2asset_instance(
if "name" in remapped_class_instance and remapped_class_instance["name"] == "":
remapped_class_instance["name"] = empty_name_default
# To maintain shape across of all assets of specific type we are adding missing metadata
# keys as empty strings, this was request by Statnett
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤫

# keys as empty strings, this was request by a customer
# Generally this is bad practice, but more of a workaround of their bad data
if missing_metadata and add_missing_metadata:
msg = f"Adding missing metadata keys with values set to empty string for {class_}"
Expand Down
Binary file not shown.
Binary file modified cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion cognite/neat/legacy/rules/examples/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
power_grid_data_model = _EXAMPLES / "power-grid-model.yaml"
simple_example = _EXAMPLES / "sheet2cdf-transformation-rules.xlsx"
source_to_solution_mapping = _EXAMPLES / "source-to-solution-mapping-rules.xlsx"
nordic44 = _EXAMPLES / "Rules-Nordic44-to-TNT.xlsx"
nordic44 = _EXAMPLES / "Rules-Nordic44.xlsx"
nordic44_graphql = _EXAMPLES / "Rules-Nordic44-to-graphql.xlsx"
skos = _EXAMPLES / "skos-rules.xlsx"
wind_energy_ontology = _EXAMPLES / "wind-energy.owl"
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ steps:
pos_y: 77
- complex_configs: {}
configs:
file_name: Rules-Nordic44-to-TNT.xlsx
file_name: Rules-Nordic44.xlsx
validation_report_file: rules_validation_report.txt
validation_report_storage_dir: rules_validation_report
version: ""
Expand Down
14 changes: 11 additions & 3 deletions cognite/neat/rules/exporters/_rules2excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,15 @@ def export(self, rules: Rules) -> Workbook:

return workbook

def _write_sheets(self, workbook: Workbook, dumped_rules: dict[str, Any], rules: Rules, sheet_prefix: str = ""):
def _write_sheets(
self,
workbook: Workbook,
dumped_rules: dict[str, Any],
rules: Rules,
sheet_prefix: str = "",
):
for sheet_name, headers in rules.headers_by_sheet(by_alias=True).items():
if sheet_name in ("Metadata", "prefixes", "Reference", "Last"):
if sheet_name in ("Metadata", "Prefixes", "Reference", "Last"):
continue
sheet = workbook.create_sheet(f"{sheet_prefix}{sheet_name}")

Expand Down Expand Up @@ -273,7 +279,9 @@ def create(self, metadata: DomainMetadata | InformationMetadata | DMSMetadata) -

new_metadata = self._create_new_info(now)
if isinstance(metadata, DMSMetadata):
from cognite.neat.rules.models.information._converter import _InformationRulesConverter
from cognite.neat.rules.models.information._converter import (
_InformationRulesConverter,
)

output_metadata: DMSMetadata | InformationMetadata = _InformationRulesConverter._convert_metadata_to_dms(
new_metadata
Expand Down
20 changes: 13 additions & 7 deletions cognite/neat/rules/importers/_inference2rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ def to_rules(
) -> tuple[Rules | None, IssueList]: ...

def to_rules(
self, errors: Literal["raise", "continue"] = "continue", role: RoleTypes | None = None
self,
errors: Literal["raise", "continue"] = "continue",
role: RoleTypes | None = None,
) -> tuple[Rules | None, IssueList] | Rules:
"""
Creates `Rules` object from the data for target role.
Expand All @@ -103,9 +105,6 @@ def to_rules(

rules_dict = self._to_rules_components()

# adding additional prefix
rules_dict["prefixes"][rules_dict["metadata"]["prefix"]] = rules_dict["metadata"]["namespace"]

with _handle_issues(self.issue_list) as future:
rules: InformationRules
rules = InformationRulesInput.load(rules_dict).as_rules()
Expand Down Expand Up @@ -134,7 +133,10 @@ def _to_rules_components(
"""
classes: dict[str, dict] = {}
properties: dict[str, dict] = {}
prefixes: dict[str, Namespace] = PREFIXES
prefixes: dict[str, Namespace] = PREFIXES.copy()

# Adds default namespace to prefixes
prefixes[self._default_metadata().prefix] = self._default_metadata().namespace

# Infers all the classes in the graph
for class_uri, no_instances in self.graph.query(ORDERED_CLASSES_QUERY): # type: ignore[misc]
Expand Down Expand Up @@ -246,7 +248,7 @@ def _default_metadata(cls):
created=datetime.now(),
updated=datetime.now(),
description="Inferred model from knowledge graph",
prefix="neat",
prefix="inferred",
namespace=DEFAULT_NAMESPACE,
)

Expand All @@ -262,6 +264,10 @@ def _update_value_type_occurrence_in_comment(cls, value_type: str, comment: str)
def _read_value_type_occurrence_from_comment(cls, value_type: str, comment: str) -> int:
return int(
cast(
re.Match, re.search(rf"with value type <{value_type}> which occurs <(\d+)> times in the graph", comment)
re.Match,
re.search(
rf"with value type <{value_type}> which occurs <(\d+)> times in the graph",
comment,
),
).group(1)
)
52 changes: 46 additions & 6 deletions cognite/neat/rules/issues/spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from cognite.client.data_classes import data_modeling as dm
from cognite.client.data_classes.data_modeling import ContainerId, ViewId
from pydantic_core import ErrorDetails
from rdflib import Namespace

from cognite.neat.issues import MultiValueError
from cognite.neat.utils.spreadsheet import SpreadsheetRead
Expand All @@ -23,6 +24,7 @@
"InvalidRowError",
"InvalidPropertyError",
"InvalidClassError",
"PrefixNamespaceCollisionError",
"InvalidContainerError",
"InvalidViewError",
"InvalidRowUnknownSheetError",
Expand All @@ -44,13 +46,18 @@ class InvalidSheetError(NeatValidationError, ABC):
@classmethod
@abstractmethod
def from_pydantic_error(
cls, error: ErrorDetails, read_info_by_sheet: dict[str, SpreadsheetRead] | None = None
cls,
error: ErrorDetails,
read_info_by_sheet: dict[str, SpreadsheetRead] | None = None,
) -> Self:
raise NotImplementedError

@classmethod
def from_pydantic_errors(
cls, errors: list[ErrorDetails], read_info_by_sheet: dict[str, SpreadsheetRead] | None = None, **kwargs: Any
cls,
errors: list[ErrorDetails],
read_info_by_sheet: dict[str, SpreadsheetRead] | None = None,
**kwargs: Any,
) -> "list[NeatValidationError]":
output: list[NeatValidationError] = []
for error in errors:
Expand Down Expand Up @@ -100,16 +107,26 @@ class InvalidRowError(InvalidSheetError, ABC):
def __lt__(self, other: object) -> bool:
if not isinstance(other, InvalidRowError):
return NotImplemented
return (self.sheet_name, self.row, self.column) < (other.sheet_name, other.row, other.column)
return (self.sheet_name, self.row, self.column) < (
other.sheet_name,
other.row,
other.column,
)

def __eq__(self, other: object) -> bool:
if not isinstance(other, InvalidRowError):
return NotImplemented
return (self.sheet_name, self.row, self.column) == (other.sheet_name, other.row, other.column)
return (self.sheet_name, self.row, self.column) == (
other.sheet_name,
other.row,
other.column,
)

@classmethod
def from_pydantic_error(
cls, error: ErrorDetails, read_info_by_sheet: dict[str, SpreadsheetRead] | None = None
cls,
error: ErrorDetails,
read_info_by_sheet: dict[str, SpreadsheetRead] | None = None,
) -> Self:
sheet_name, _, row, column, *__ = error["loc"]
reader = (read_info_by_sheet or {}).get(str(sheet_name), SpreadsheetRead())
Expand Down Expand Up @@ -173,7 +190,9 @@ class InvalidRowUnknownSheetError(InvalidRowError):

@classmethod
def from_pydantic_error(
cls, error: ErrorDetails, read_info_by_sheet: dict[str, SpreadsheetRead] | None = None
cls,
error: ErrorDetails,
read_info_by_sheet: dict[str, SpreadsheetRead] | None = None,
) -> Self:
sheet_name, _, row, column, *__ = error["loc"]
reader = (read_info_by_sheet or {}).get(str(sheet_name), SpreadsheetRead())
Expand Down Expand Up @@ -294,6 +313,27 @@ def message(self) -> str:
return f"Parent classes {', '.join(self.classes[0])} are not defined. This may be a mistake."


@dataclass(frozen=True)
class PrefixNamespaceCollisionError(NeatValidationError):
description = "Same namespaces are assigned to different prefixes."
fix = "Make sure that each unique namespace is assigned to a unique prefix"

namespaces: list[Namespace]
prefixes: list[str]

def dump(self) -> dict[str, list[str]]:
output = super().dump()
output["prefixes"] = self.prefixes
output["namespaces"] = self.namespaces
return output

def message(self) -> str:
return (
f"Namespaces {', '.join(self.namespaces)} are assigned multiple times."
f" Impacted prefixes: {', '.join(self.prefixes)}."
)


@dataclass(frozen=True)
class ValueTypeNotDefinedError(NeatValidationError):
description = "Value types referred by properties are not defined in Rules."
Expand Down
4 changes: 3 additions & 1 deletion cognite/neat/rules/models/information/_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,16 @@ class InformationRules(BaseRules):
metadata: InformationMetadata = Field(alias="Metadata")
properties: SheetList[InformationProperty] = Field(alias="Properties")
classes: SheetList[InformationClass] = Field(alias="Classes")
prefixes: dict[str, Namespace] = Field(default_factory=lambda: PREFIXES.copy())
prefixes: dict[str, Namespace] = Field(default_factory=lambda: PREFIXES.copy(), alias="Prefixes")
last: "InformationRules | None" = Field(None, alias="Last")
reference: "InformationRules | None" = Field(None, alias="Reference")

@field_validator("prefixes", mode="before")
def parse_str(cls, values: Any) -> Any:
if isinstance(values, dict):
return {key: Namespace(value) if isinstance(value, str) else value for key, value in values.items()}
elif values is None:
values = PREFIXES.copy()
return values

@model_validator(mode="after")
Expand Down
Loading
Loading