cognitedata · nikokaoja · Jul 8, 2024 · Jul 1, 2024 · Jul 1, 2024 · Jul 7, 2024
@@ -1,5 +1,5 @@
 .PHONY: run-explorer run-tests run-linters build-ui build-python build-docker run-docker compose-up
-version="0.85.4"
+version="0.85.5"
 run-explorer:
 	@echo "Running explorer API server..."
 	# open "http://localhost:8000/static/index.html" || true

@@ -1 +1 @@
-__version__ = "0.85.4"
+__version__ = "0.85.5"
@@ -22,14 +22,6 @@
     "owl": OWL._NS,
     "xsd": XSD._NS,
     "pav": Namespace("http://purl.org/pav/"),
-    "cim": Namespace("http://iec.ch/TC57/2013/CIM-schema-cim16#"),
-    "icim": Namespace("http://iec.ch/TC57/2013/CIM-schema-cim16-info#"),
-    "entsoe": Namespace("http://entsoe.eu/CIM/SchemaExtension/3/1#"),
-    "entsoe2": Namespace("http://entsoe.eu/CIM/SchemaExtension/3/2#"),
-    "md": Namespace("http://iec.ch/TC57/61970-552/ModelDescription/1#"),
-    "pti": Namespace("http://www.pti-us.com/PTI_CIM-schema-cim16#"),
-    "tnt": Namespace("http://purl.org/cognite/tnt#"),
-    "neat": DEFAULT_NAMESPACE,
 }
 
 

@@ -82,7 +82,9 @@ def from_rules(cls, rules: DMSRules, graph_store: NeatGraphStore, instance_space
         except Exception as e:
             issues.append(
                 loader_issues.FailedConvertError(
-                    identifier=rules.metadata.as_identifier(), target_format="read DMS model", reason=str(e)
+                    identifier=rules.metadata.as_identifier(),
+                    target_format="read DMS model",
+                    reason=str(e),
                 )
             )
         return cls(graph_store, data_model, instance_space, {}, issues)
@@ -199,7 +201,11 @@ def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list
         return pydantic_cls, edge_by_property, issues
 
     def _create_node(
-        self, identifier: str, properties: dict[str, list[str]], pydantic_cls: type[Model], view_id: dm.ViewId
+        self,
+        identifier: str,
+        properties: dict[str, list[str]],
+        pydantic_cls: type[Model],
+        view_id: dm.ViewId,
     ) -> dm.InstanceApply:
         created = pydantic_cls.model_validate(properties)
 
@@ -233,7 +239,7 @@ def _create_edges(
                 external_id = f"{identifier}.{prop}.{target}"
                 yield dm.EdgeApply(
                     space=self.instance_space,
-                    external_id=external_id if len(external_id) < 256 else create_sha256_hash(external_id),
+                    external_id=(external_id if len(external_id) < 256 else create_sha256_hash(external_id)),
                     type=edge.type,
                     start_node=dm.DirectRelationReference(self.instance_space, identifier),
                     end_node=dm.DirectRelationReference(self.instance_space, target),
@@ -290,5 +296,7 @@ def _triples2dictionary(
     """Converts list of triples to dictionary"""
     values_by_property_by_identifier: dict[str, dict[str, list[str]]] = defaultdict(lambda: defaultdict(list))
     for id_, property_, value in triples:
-        values_by_property_by_identifier[id_][property_].append(value)
+        # avoid issue with strings "None", "nan", "null" being treated as values
+        if value.lower() not in ["", "None", "nan", "null"]:
+            values_by_property_by_identifier[id_][property_].append(value)
     return values_by_property_by_identifier
@@ -5,3 +5,6 @@
 # https://github.com/pydata/xarray/blob/main/xarray/tutorial.py
 # Currently there are simple paths to the examples which are then easily loaded in the notebooks
 nordic44_knowledge_graph = Path(__file__).parent / "Knowledge-Graph-Nordic44.xml"
+nordic44_knowledge_graph_dirty = (
+    Path(__file__).parent / "Knowledge-Graph-Nordic44-dirty.xml"
+)
@@ -3,7 +3,7 @@
 
 from rdflib import Namespace
 
-from cognite.neat.constants import PREFIXES
+from cognite.neat.constants import DEFAULT_NAMESPACE
 from cognite.neat.graph.extractors._dexpi import DexpiExtractor
 from cognite.neat.legacy.graph.models import Triple
 
@@ -31,7 +31,7 @@ def __init__(
         base_namespace: str | None = None,
     ):
         self.filepath = Path(filepath)
-        self.namespace = Namespace(base_namespace) if isinstance(base_namespace, str | Namespace) else PREFIXES["neat"]
+        self.namespace = Namespace(base_namespace) if isinstance(base_namespace, str | Namespace) else DEFAULT_NAMESPACE
 
     def extract(self) -> set[Triple]:
         """

@@ -271,7 +271,7 @@ def _class2asset_instance(
     if "name" in remapped_class_instance and remapped_class_instance["name"] == "":
         remapped_class_instance["name"] = empty_name_default
     # To maintain shape across of all assets of specific type we are adding missing metadata
-    # keys as empty strings, this was request by Statnett
+    # keys as empty strings, this was request by a customer
     # Generally this is bad practice, but more of a workaround of their bad data
     if missing_metadata and add_missing_metadata:
         msg = f"Adding missing metadata keys with values set to empty string for {class_}"

@@ -12,7 +12,7 @@
 power_grid_data_model = _EXAMPLES / "power-grid-model.yaml"
 simple_example = _EXAMPLES / "sheet2cdf-transformation-rules.xlsx"
 source_to_solution_mapping = _EXAMPLES / "source-to-solution-mapping-rules.xlsx"
-nordic44 = _EXAMPLES / "Rules-Nordic44-to-TNT.xlsx"
+nordic44 = _EXAMPLES / "Rules-Nordic44.xlsx"
 nordic44_graphql = _EXAMPLES / "Rules-Nordic44-to-graphql.xlsx"
 skos = _EXAMPLES / "skos-rules.xlsx"
 wind_energy_ontology = _EXAMPLES / "wind-energy.owl"
@@ -24,7 +24,7 @@ steps:
       pos_y: 77
   - complex_configs: {}
     configs:
-      file_name: Rules-Nordic44-to-TNT.xlsx
+      file_name: Rules-Nordic44.xlsx
       validation_report_file: rules_validation_report.txt
       validation_report_storage_dir: rules_validation_report
       version: ""

@@ -143,9 +143,15 @@ def export(self, rules: Rules) -> Workbook:
 
         return workbook
 
-    def _write_sheets(self, workbook: Workbook, dumped_rules: dict[str, Any], rules: Rules, sheet_prefix: str = ""):
+    def _write_sheets(
+        self,
+        workbook: Workbook,
+        dumped_rules: dict[str, Any],
+        rules: Rules,
+        sheet_prefix: str = "",
+    ):
         for sheet_name, headers in rules.headers_by_sheet(by_alias=True).items():
-            if sheet_name in ("Metadata", "prefixes", "Reference", "Last"):
+            if sheet_name in ("Metadata", "Prefixes", "Reference", "Last"):
                 continue
             sheet = workbook.create_sheet(f"{sheet_prefix}{sheet_name}")
 
@@ -273,7 +279,9 @@ def create(self, metadata: DomainMetadata | InformationMetadata | DMSMetadata) -
 
         new_metadata = self._create_new_info(now)
         if isinstance(metadata, DMSMetadata):
-            from cognite.neat.rules.models.information._converter import _InformationRulesConverter
+            from cognite.neat.rules.models.information._converter import (
+                _InformationRulesConverter,
+            )
 
             output_metadata: DMSMetadata | InformationMetadata = _InformationRulesConverter._convert_metadata_to_dms(
                 new_metadata

@@ -91,7 +91,9 @@ def to_rules(
     ) -> tuple[Rules | None, IssueList]: ...
 
     def to_rules(
-        self, errors: Literal["raise", "continue"] = "continue", role: RoleTypes | None = None
+        self,
+        errors: Literal["raise", "continue"] = "continue",
+        role: RoleTypes | None = None,
     ) -> tuple[Rules | None, IssueList] | Rules:
         """
         Creates `Rules` object from the data for target role.
@@ -103,9 +105,6 @@ def to_rules(
 
         rules_dict = self._to_rules_components()
 
-        # adding additional prefix
-        rules_dict["prefixes"][rules_dict["metadata"]["prefix"]] = rules_dict["metadata"]["namespace"]
-
         with _handle_issues(self.issue_list) as future:
             rules: InformationRules
             rules = InformationRulesInput.load(rules_dict).as_rules()
@@ -134,7 +133,10 @@ def _to_rules_components(
         """
         classes: dict[str, dict] = {}
         properties: dict[str, dict] = {}
-        prefixes: dict[str, Namespace] = PREFIXES
+        prefixes: dict[str, Namespace] = PREFIXES.copy()
+
+        # Adds default namespace to prefixes
+        prefixes[self._default_metadata().prefix] = self._default_metadata().namespace
 
         # Infers all the classes in the graph
         for class_uri, no_instances in self.graph.query(ORDERED_CLASSES_QUERY):  # type: ignore[misc]
@@ -246,7 +248,7 @@ def _default_metadata(cls):
             created=datetime.now(),
             updated=datetime.now(),
             description="Inferred model from knowledge graph",
-            prefix="neat",
+            prefix="inferred",
             namespace=DEFAULT_NAMESPACE,
         )
 
@@ -262,6 +264,10 @@ def _update_value_type_occurrence_in_comment(cls, value_type: str, comment: str)
     def _read_value_type_occurrence_from_comment(cls, value_type: str, comment: str) -> int:
         return int(
             cast(
-                re.Match, re.search(rf"with value type <{value_type}> which occurs <(\d+)> times in the graph", comment)
+                re.Match,
+                re.search(
+                    rf"with value type <{value_type}> which occurs <(\d+)> times in the graph",
+                    comment,
+                ),
             ).group(1)
         )
@@ -7,6 +7,7 @@
 from cognite.client.data_classes import data_modeling as dm
 from cognite.client.data_classes.data_modeling import ContainerId, ViewId
 from pydantic_core import ErrorDetails
+from rdflib import Namespace
 
 from cognite.neat.issues import MultiValueError
 from cognite.neat.utils.spreadsheet import SpreadsheetRead
@@ -23,6 +24,7 @@
     "InvalidRowError",
     "InvalidPropertyError",
     "InvalidClassError",
+    "PrefixNamespaceCollisionError",
     "InvalidContainerError",
     "InvalidViewError",
     "InvalidRowUnknownSheetError",
@@ -44,13 +46,18 @@ class InvalidSheetError(NeatValidationError, ABC):
     @classmethod
     @abstractmethod
     def from_pydantic_error(
-        cls, error: ErrorDetails, read_info_by_sheet: dict[str, SpreadsheetRead] | None = None
+        cls,
+        error: ErrorDetails,
+        read_info_by_sheet: dict[str, SpreadsheetRead] | None = None,
     ) -> Self:
         raise NotImplementedError
 
     @classmethod
     def from_pydantic_errors(
-        cls, errors: list[ErrorDetails], read_info_by_sheet: dict[str, SpreadsheetRead] | None = None, **kwargs: Any
+        cls,
+        errors: list[ErrorDetails],
+        read_info_by_sheet: dict[str, SpreadsheetRead] | None = None,
+        **kwargs: Any,
     ) -> "list[NeatValidationError]":
         output: list[NeatValidationError] = []
         for error in errors:
@@ -100,16 +107,26 @@ class InvalidRowError(InvalidSheetError, ABC):
     def __lt__(self, other: object) -> bool:
         if not isinstance(other, InvalidRowError):
             return NotImplemented
-        return (self.sheet_name, self.row, self.column) < (other.sheet_name, other.row, other.column)
+        return (self.sheet_name, self.row, self.column) < (
+            other.sheet_name,
+            other.row,
+            other.column,
+        )
 
     def __eq__(self, other: object) -> bool:
         if not isinstance(other, InvalidRowError):
             return NotImplemented
-        return (self.sheet_name, self.row, self.column) == (other.sheet_name, other.row, other.column)
+        return (self.sheet_name, self.row, self.column) == (
+            other.sheet_name,
+            other.row,
+            other.column,
+        )
 
     @classmethod
     def from_pydantic_error(
-        cls, error: ErrorDetails, read_info_by_sheet: dict[str, SpreadsheetRead] | None = None
+        cls,
+        error: ErrorDetails,
+        read_info_by_sheet: dict[str, SpreadsheetRead] | None = None,
     ) -> Self:
         sheet_name, _, row, column, *__ = error["loc"]
         reader = (read_info_by_sheet or {}).get(str(sheet_name), SpreadsheetRead())
@@ -173,7 +190,9 @@ class InvalidRowUnknownSheetError(InvalidRowError):
 
     @classmethod
     def from_pydantic_error(
-        cls, error: ErrorDetails, read_info_by_sheet: dict[str, SpreadsheetRead] | None = None
+        cls,
+        error: ErrorDetails,
+        read_info_by_sheet: dict[str, SpreadsheetRead] | None = None,
     ) -> Self:
         sheet_name, _, row, column, *__ = error["loc"]
         reader = (read_info_by_sheet or {}).get(str(sheet_name), SpreadsheetRead())
@@ -294,6 +313,27 @@ def message(self) -> str:
         return f"Parent classes {', '.join(self.classes[0])} are not defined. This may be a mistake."
 
 
+@dataclass(frozen=True)
+class PrefixNamespaceCollisionError(NeatValidationError):
+    description = "Same namespaces are assigned to different prefixes."
+    fix = "Make sure that each unique namespace is assigned to a unique prefix"
+
+    namespaces: list[Namespace]
+    prefixes: list[str]
+
+    def dump(self) -> dict[str, list[str]]:
+        output = super().dump()
+        output["prefixes"] = self.prefixes
+        output["namespaces"] = self.namespaces
+        return output
+
+    def message(self) -> str:
+        return (
+            f"Namespaces {', '.join(self.namespaces)} are assigned multiple times."
+            f" Impacted prefixes: {', '.join(self.prefixes)}."
+        )
+
+
 @dataclass(frozen=True)
 class ValueTypeNotDefinedError(NeatValidationError):
     description = "Value types referred by properties are not defined in Rules."

@@ -259,14 +259,16 @@ class InformationRules(BaseRules):
     metadata: InformationMetadata = Field(alias="Metadata")
     properties: SheetList[InformationProperty] = Field(alias="Properties")
     classes: SheetList[InformationClass] = Field(alias="Classes")
-    prefixes: dict[str, Namespace] = Field(default_factory=lambda: PREFIXES.copy())
+    prefixes: dict[str, Namespace] = Field(default_factory=lambda: PREFIXES.copy(), alias="Prefixes")
     last: "InformationRules | None" = Field(None, alias="Last")
     reference: "InformationRules | None" = Field(None, alias="Reference")
 
     @field_validator("prefixes", mode="before")
     def parse_str(cls, values: Any) -> Any:
         if isinstance(values, dict):
             return {key: Namespace(value) if isinstance(value, str) else value for key, value in values.items()}
+        elif values is None:
+            values = PREFIXES.copy()
         return values
 
     @model_validator(mode="after")