Skip to content

Commit

Permalink
[NEAT-36] Sanitize Errors. (#287)
Browse files Browse the repository at this point in the history
* refactor: setup shell for issuesÃ

* refactor: Finish signature of importers with errors returned

* refactor: started adding errors

* refactor: fix overload signatures

* refactor: moved properties and renaming

* refactor; setup all basic exceptions

* refactor: fix imports and move test

* fix: wrong init'

* tests: spreadsheet not existing

* tests: setup failing test for property

* refactor: setup mechanism for catching pydantic errors

* refactor; setup invalid data classes

* tests: Finished test

* refactor: better parsing

* refactor: correct location of error

* tests; generalized tests

* tests: Upgraded all tests

* refactor:  restructure to handle all content errors

* refactor: introduce validation package

* refactor: Moved all errors into validation package

* refactor: plural name

* fix: parsing of
inconsistency container definitons

* refactor: catching MultiValueError

* refactor: handle inconsistent conainer error

* refactor: make private

* fix: missing import

* refactor: as_errors implementation

* refactor: added missing messages
  • Loading branch information
doctrino authored Mar 3, 2024
1 parent 5560b43 commit 527c84e
Show file tree
Hide file tree
Showing 29 changed files with 981 additions and 255 deletions.
1 change: 0 additions & 1 deletion cognite/neat/rules/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
- 400 - 499: errors and warnings raised when dealing TransformationRules exporters
"""

from typing import Any

from cognite.client.data_classes.data_modeling import ContainerId, DataModelId, ViewId
Expand Down
5 changes: 3 additions & 2 deletions cognite/neat/rules/exporters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from ._rules2dms import DMSExporter
from ._rules2dms import CDFExporter, DMSExporter
from ._rules2ontology import GraphExporter, OWLExporter, SemanticDataModelExporter, SHACLExporter

__all__ = ["DMSExporter"]
__all__ = ["DMSExporter", "CDFExporter", "SemanticDataModelExporter", "OWLExporter", "GraphExporter", "SHACLExporter"]
20 changes: 18 additions & 2 deletions cognite/neat/rules/importers/_base.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,35 @@
import getpass
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Literal, TypeAlias, overload

from rdflib import Namespace

from cognite.neat.rules.models._rules import DMSRules, DomainRules, InformationRules
from cognite.neat.rules.models._rules import DMSRules, DomainRules, InformationRules, RoleTypes
from cognite.neat.rules.validation import IssueList

Rule: TypeAlias = DomainRules | InformationRules | DMSRules


class BaseImporter(ABC):
"""
BaseImporter class which all importers inherit from.
"""

@overload
def to_rules(self, errors: Literal["raise"], role: RoleTypes | None = None) -> Rule:
...

@overload
def to_rules(
self, errors: Literal["continue"] = "continue", role: RoleTypes | None = None
) -> tuple[Rule | None, IssueList]:
...

@abstractmethod
def to_rules(self) -> DomainRules | InformationRules | DMSRules:
def to_rules(
self, errors: Literal["raise", "continue"] = "continue", role: RoleTypes | None = None
) -> tuple[Rule | None, IssueList] | Rule:
"""
Creates `Rules` object from the data for target role.
"""
Expand Down
21 changes: 18 additions & 3 deletions cognite/neat/rules/importers/_dms2rules.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import cast
from typing import Literal, cast, overload

from cognite.client import data_modeling as dm
from cognite.client.data_classes.data_modeling.containers import BTreeIndex, InvertedIndex
from cognite.client.data_classes.data_modeling.data_types import ListablePropertyType

from cognite.neat.rules.models._rules import DMSRules, DMSSchema
from cognite.neat.rules.models._rules import DMSRules, DMSSchema, RoleTypes
from cognite.neat.rules.models._rules._types import ContainerEntity, DMSValueType, ViewEntity
from cognite.neat.rules.models._rules.dms_architect_rules import (
DMSContainer,
Expand All @@ -13,6 +13,7 @@
DMSView,
SheetList,
)
from cognite.neat.rules.validation import IssueList

from ._base import BaseImporter

Expand All @@ -21,7 +22,21 @@ class DMSImporter(BaseImporter):
def __init__(self, schema: DMSSchema):
self.schema = schema

def to_rules(self) -> DMSRules:
@overload
def to_rules(self, errors: Literal["raise"], role: RoleTypes | None = None) -> DMSRules:
...

@overload
def to_rules(
self, errors: Literal["continue"] = "continue", role: RoleTypes | None = None
) -> tuple[DMSRules | None, IssueList]:
...

def to_rules(
self, errors: Literal["raise", "continue"] = "continue", role: RoleTypes | None = None
) -> tuple[DMSRules | None, IssueList] | DMSRules:
if role is not None and role != RoleTypes.dms_architect:
raise ValueError(f"Role {role} is not supported for DMSImporter")
data_model = self.schema.data_models[0]

container_by_id = {container.as_id(): container for container in self.schema.containers}
Expand Down
32 changes: 25 additions & 7 deletions cognite/neat/rules/importers/_owl2rules/_owl2rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
# TODO: if this module grows too big, split it into several files and place under ./converter directory

from pathlib import Path
from typing import Literal, overload

from rdflib import DC, DCTERMS, OWL, RDF, RDFS, SKOS, Graph

from cognite.neat.rules.importers._base import BaseImporter
from cognite.neat.rules.models._rules import InformationRules
from cognite.neat.rules.models._rules import InformationRules, RoleTypes
from cognite.neat.rules.models.value_types import XSD_VALUE_TYPE_MAPPINGS
from cognite.neat.rules.validation import IssueList

from ._owl2classes import parse_owl_classes
from ._owl2metadata import parse_owl_metadata
Expand All @@ -36,10 +38,26 @@ class OWLImporter(BaseImporter):
"""

def __init__(self, owl_filepath: Path):
def __init__(self, owl_filepath: Path, make_compliant: bool = True):
self.owl_filepath = owl_filepath
self.make_compliant = make_compliant

@overload
def to_rules(self, errors: Literal["raise"], role: RoleTypes | None = None) -> InformationRules:
...

@overload
def to_rules(
self, errors: Literal["continue"] = "continue", role: RoleTypes | None = None
) -> tuple[InformationRules | None, IssueList]:
...

def to_rules(
self, errors: Literal["raise", "continue"] = "continue", role: RoleTypes | None = None
) -> tuple[InformationRules | None, IssueList] | InformationRules:
if role is not None and role != RoleTypes.information_architect:
raise ValueError(f"Role {role} is not supported for OWLImporter")

def to_rules(self, make_compliant: bool = True) -> InformationRules:
graph = Graph()
try:
graph.parse(self.owl_filepath)
Expand All @@ -55,12 +73,12 @@ def to_rules(self, make_compliant: bool = True) -> InformationRules:
graph.bind("skos", SKOS)

components = {
"Metadata": parse_owl_metadata(graph, make_compliant=make_compliant),
"Classes": parse_owl_classes(graph, make_compliant=make_compliant),
"Properties": parse_owl_properties(graph, make_compliant=make_compliant),
"Metadata": parse_owl_metadata(graph, make_compliant=self.make_compliant),
"Classes": parse_owl_classes(graph, make_compliant=self.make_compliant),
"Properties": parse_owl_properties(graph, make_compliant=self.make_compliant),
}

if make_compliant:
if self.make_compliant:
components = make_components_compliant(components)

return InformationRules.model_validate(components)
Expand Down
136 changes: 98 additions & 38 deletions cognite/neat/rules/importers/_spreadsheet2rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,72 +2,132 @@
In more details, it traverses the graph and abstracts class and properties, basically
generating a list of rules based on which nodes that form the graph are made.
"""

from collections import defaultdict
from pathlib import Path
from typing import cast
from typing import Literal, cast, overload

import pandas as pd
from pydantic import ValidationError

from cognite.neat.rules.exceptions import MetadataSheetMissingOrFailedValidation
from cognite.neat.rules import validation
from cognite.neat.rules.models._rules import RULES_PER_ROLE, DMSRules, DomainRules, InformationRules
from cognite.neat.rules.models._rules.base import RoleTypes
from cognite.neat.rules.validation import IssueList
from cognite.neat.utils.auxiliary import local_import
from cognite.neat.utils.spreadsheet import read_spreadsheet

from ._base import BaseImporter
from ._base import BaseImporter, Rule


class ExcelImporter(BaseImporter):
def __init__(self, filepath: Path):
self.filepath = filepath

def to_rules(self, role: RoleTypes | None = None) -> DomainRules | InformationRules | DMSRules:
excel_file = pd.ExcelFile(self.filepath)
@overload
def to_rules(self, errors: Literal["raise"], role: RoleTypes | None = None) -> Rule:
...

@overload
def to_rules(
self, errors: Literal["continue"] = "continue", role: RoleTypes | None = None
) -> tuple[Rule | None, IssueList]:
...

def to_rules(
self, errors: Literal["raise", "continue"] = "continue", role: RoleTypes | None = None
) -> tuple[Rule | None, IssueList] | Rule:
issues = IssueList()
try:
excel_file = pd.ExcelFile(self.filepath)
except FileNotFoundError:
issues.append(validation.SpreadsheetNotFound(self.filepath.name))
if errors == "raise":
raise issues.as_errors() from None
return None, issues

try:
metadata = dict(pd.read_excel(excel_file, "Metadata", header=None).values)
except ValueError as e:
if ...:
raise MetadataSheetMissingOrFailedValidation() from None
else:
raise UserWarning("Metadata sheet is missing or failed validation") from e
except ValueError:
issues.append(validation.MetadataSheetMissingOrFailed())
if errors == "raise":
raise issues.as_errors() from None
return None, issues

role = role or RoleTypes(metadata.get("role", RoleTypes.domain_expert))
role_enum = RoleTypes(role)
rules_model = cast(DomainRules | InformationRules | DMSRules, RULES_PER_ROLE[role_enum])
sheet_names = {str(name).lower() for name in excel_file.sheet_names}
rules_model = RULES_PER_ROLE[role_enum]
sheet_names = {str(name) for name in excel_file.sheet_names}
expected_sheet_names = rules_model.mandatory_fields(use_alias=True)

if missing_sheets := expected_sheet_names.difference(sheet_names):
issues.append(validation.SpreadsheetMissing(list(missing_sheets)))
if errors == "raise":
raise issues.as_errors()
return None, issues

sheets: dict[str, dict | list] = {"Metadata": metadata}
header_row_no_by_sheet: dict[str, int] = defaultdict(int)
for sheet_name, headers in [
("Properties", "Class"),
("Classes", "Class"),
("Containers", "Container"),
("Views", "View"),
]:
if sheet_name in excel_file.sheet_names:
try:
sheets[sheet_name], header_row_no_by_sheet[sheet_name] = read_spreadsheet(
excel_file, sheet_name, return_header_row=True, expected_headers=[headers]
)
except Exception as e:
issues.append(validation.ReadSpreadsheets(str(e)))
continue
if issues:
if errors == "raise":
raise issues.as_errors()
return None, issues

rules_cls = {
RoleTypes.domain_expert: DomainRules,
RoleTypes.information_architect: InformationRules,
RoleTypes.dms_architect: DMSRules,
}.get(role_enum)
if not rules_cls:
issues.append(validation.InvalidRole(str(role)))
if errors == "raise":
raise issues.as_errors()
return None, issues

if missing_sheets := rules_model.mandatory_fields().difference(sheet_names):
raise ValueError(f"Missing mandatory sheets: {missing_sheets}")
try:
rules = rules_cls.model_validate(sheets) # type: ignore[attr-defined]
except ValidationError as e:
issues.extend(validation.InvalidSheetContent.from_pydantic_errors(e.errors(), header_row_no_by_sheet))
if errors == "raise":
raise issues.as_errors() from e
return None, issues

sheets = {
"Metadata": metadata,
"Properties": read_spreadsheet(excel_file, "Properties", ["Class"]),
"Classes": (
read_spreadsheet(excel_file, "Classes", ["Class"]) if "Classes" in excel_file.sheet_names else None
),
"Containers": (
read_spreadsheet(excel_file, "Containers", ["Container"])
if "Containers" in excel_file.sheet_names
else None
),
"Views": (read_spreadsheet(excel_file, "Views", ["View"]) if "Views" in excel_file.sheet_names else None),
}
if role_enum is RoleTypes.domain_expert:
return rules_model.model_validate(sheets)
elif role_enum is RoleTypes.information_architect:
return rules_model.model_validate(sheets)
elif role_enum is RoleTypes.dms_architect:
return rules_model.model_validate(sheets)
else:
raise ValueError(f"Role {role} is not valid.")
if errors == "raise":
return rules
return rules, issues


class GoogleSheetImporter(BaseImporter):
def __init__(self, sheet_id: str):
def __init__(self, sheet_id: str, skiprows: int = 1):
self.sheet_id = sheet_id
self.skiprows = skiprows

@overload
def to_rules(self, errors: Literal["raise"], role: RoleTypes | None = None) -> Rule:
...

@overload
def to_rules(
self, errors: Literal["continue"] = "continue", role: RoleTypes | None = None
) -> tuple[Rule | None, IssueList]:
...

def to_rules(self, role: RoleTypes | None = None, skiprows: int = 1) -> DomainRules | InformationRules | DMSRules:
def to_rules(
self, errors: Literal["raise", "continue"] = "continue", role: RoleTypes | None = None
) -> tuple[Rule | None, IssueList] | Rule:
local_import("gspread", "google")
import gspread # type: ignore[import]

Expand Down
2 changes: 1 addition & 1 deletion cognite/neat/rules/models/_rules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .domain_rules import DomainRules
from .information_rules import InformationRules

RULES_PER_ROLE = {
RULES_PER_ROLE: dict[RoleTypes, type[DomainRules] | type[InformationRules] | type[DMSRules]] = {
RoleTypes.domain_expert: DomainRules,
RoleTypes.information_architect: InformationRules,
RoleTypes.dms_architect: DMSRules,
Expand Down
7 changes: 6 additions & 1 deletion cognite/neat/rules/models/_rules/_types/_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
Boolean,
Date,
FileReference,
Float32,
Float64,
Int32,
Int64,
Expand Down Expand Up @@ -61,6 +62,9 @@ def graphql(self) -> str:
class DMSValueType(XSDValueType):
type_: ClassVar[EntityTypes] = EntityTypes.dms_value_type

def __str__(self) -> str:
return self.dms._type


_DATA_TYPES: list[dict] = [
{"name": "boolean", "python": bool, "GraphQL": "Boolean", "dms": Boolean},
Expand Down Expand Up @@ -89,7 +93,8 @@ class DMSValueType(XSDValueType):

_DMS_TYPES: list[dict] = [
{"name": "boolean", "python": bool, "GraphQL": "Boolean", "dms": Boolean},
{"name": "float", "python": float, "GraphQL": "Float", "dms": Float64},
{"name": "float", "python": float, "GraphQL": "Float", "dms": Float32},
{"name": "double", "python": float, "GraphQL": "Float", "dms": Float64},
{"name": "integer", "python": int, "GraphQL": "Int", "dms": Int32},
{"name": "long", "python": int, "GraphQL": "Int", "dms": Int64},
{"name": "string", "python": str, "GraphQL": "String", "dms": Text},
Expand Down
Loading

0 comments on commit 527c84e

Please sign in to comment.