Skip to content

Commit

Permalink
[NEAT-50] HTML report formatter (#289)
Browse files Browse the repository at this point in the history
* refactor; setup basic shell for formatter'

* refactor; dump and to_pandas method

* refactor; added basic formatter

* feat: setup step to use formatter

* refactor: set basic formatter in Validate workflow

* refactor: setup message

* refactor: title on issues list

* refactor: added validation of schema

* refactor: fix bug in validate schema'

* refactor; update export DMS workflow
  • Loading branch information
doctrino authored Mar 4, 2024
1 parent cb27906 commit 2281c7c
Show file tree
Hide file tree
Showing 12 changed files with 329 additions and 13 deletions.
2 changes: 1 addition & 1 deletion cognite/neat/rules/importers/_spreadsheet2rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def to_rules(
def to_rules(
self, errors: Literal["raise", "continue"] = "continue", role: RoleTypes | None = None
) -> tuple[Rule | None, IssueList] | Rule:
issues = IssueList()
issues = IssueList(title=f"'{self.filepath.name}'")
try:
excel_file = pd.ExcelFile(self.filepath)
except FileNotFoundError:
Expand Down
11 changes: 11 additions & 0 deletions cognite/neat/rules/models/_rules/dms_architect_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,17 @@ def consistent_container_properties(self) -> "DMSRules":
raise validation.MultiValueError(errors)
return self

@model_validator(mode="after")
def validate_schema(self) -> "DMSRules":
if self.metadata.schema_ is not SchemaCompleteness.complete:
return self

schema = self.as_schema()
errors = schema.validate()
if errors:
raise validation.MultiValueError(errors)
return self

def as_schema(self) -> DMSSchema:
return _DMSExporter(self).to_schema()

Expand Down
27 changes: 23 additions & 4 deletions cognite/neat/rules/validation/_base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import sys
from abc import ABC
from abc import ABC, abstractmethod
from collections import UserList
from collections.abc import Sequence
from dataclasses import dataclass
from typing import ClassVar
from typing import Any, ClassVar

import pandas as pd

if sys.version_info < (3, 11):
from exceptiongroup import ExceptionGroup
Expand All @@ -23,24 +25,41 @@ def message(self) -> str:
"""
return self.description

@abstractmethod
def dump(self) -> dict[str, Any]:
"""Return a dictionary representation of the issue."""
raise NotImplementedError()


@dataclass(frozen=True, order=True)
class Error(ValidationIssue, ABC):
...
def dump(self) -> dict[str, Any]:
return {"error": type(self).__name__}


@dataclass(frozen=True, order=True)
class ValidationWarning(ValidationIssue, ABC):
...
def dump(self) -> dict[str, Any]:
return {"warning": type(self).__name__}


class IssueList(UserList[ValidationIssue]):
def __init__(self, issues: Sequence[ValidationIssue] | None = None, title: str | None = None):
super().__init__(issues or [])
self.title = title

def as_errors(self) -> ExceptionGroup:
return ExceptionGroup(
"Validation failed",
[ValueError(issue.message()) for issue in self if isinstance(issue, Error)],
)

def to_pandas(self) -> pd.DataFrame:
return pd.DataFrame([issue.dump() for issue in self])

def _repr_html_(self) -> str | None:
return self.to_pandas()._repr_html_() # type: ignore[operator]


class MultiValueError(ValueError):
"""This is a container for multiple errors.
Expand Down
42 changes: 42 additions & 0 deletions cognite/neat/rules/validation/_container_inconsistency.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import ABC
from dataclasses import dataclass
from typing import Any

from cognite.client import data_modeling as dm

Expand All @@ -14,6 +15,17 @@ class InconsistentContainerDefinition(Error, ABC):
property_name: str
row_numbers: set[int]

def dump(self) -> dict[str, Any]:
output = super().dump()
output.update(
{
"container": self.container.dump(),
"property_name": self.property_name,
"row_numbers": sorted(self.row_numbers),
}
)
return output


@dataclass(frozen=True, order=True)
class MultiValueTypeDefinitions(InconsistentContainerDefinition):
Expand All @@ -27,6 +39,11 @@ def message(self) -> str:
f"has different value types: {self.value_types}"
)

def dump(self) -> dict[str, Any]:
output = super().dump()
output["value_types"] = sorted(self.value_types)
return output


@dataclass(frozen=True, order=True)
class MultiValueIsListDefinitions(InconsistentContainerDefinition):
Expand All @@ -40,6 +57,11 @@ def message(self) -> str:
f"has different list definitions: {self.list_definitions}"
)

def dump(self) -> dict[str, Any]:
output = super().dump()
output["list_definitions"] = sorted(self.list_definitions)
return output


@dataclass(frozen=True, order=True)
class MultiNullableDefinitions(InconsistentContainerDefinition):
Expand All @@ -53,6 +75,11 @@ def message(self) -> str:
f"has different nullable definitions: {self.nullable_definitions}"
)

def dump(self) -> dict[str, Any]:
output = super().dump()
output["nullable_definitions"] = sorted(self.nullable_definitions)
return output


@dataclass(frozen=True, order=True)
class MultiDefaultDefinitions(InconsistentContainerDefinition):
Expand All @@ -66,6 +93,11 @@ def message(self) -> str:
f"has different default definitions: {self.default_definitions}"
)

def dump(self) -> dict[str, Any]:
output = super().dump()
output["default_definitions"] = self.default_definitions
return output


@dataclass(frozen=True, order=True)
class MultiIndexDefinitions(InconsistentContainerDefinition):
Expand All @@ -79,6 +111,11 @@ def message(self) -> str:
f"has different index definitions: {self.index_definitions}"
)

def dump(self) -> dict[str, Any]:
output = super().dump()
output["index_definitions"] = sorted(self.index_definitions)
return output


@dataclass(frozen=True, order=True)
class MultiUniqueConstraintDefinitions(InconsistentContainerDefinition):
Expand All @@ -91,3 +128,8 @@ def message(self) -> str:
f"{self.container}.{self.property_name} defined in rows: {sorted(self.row_numbers)} "
f"has different unique constraint definitions: {self.unique_constraint_definitions}"
)

def dump(self) -> dict[str, Any]:
output = super().dump()
output["unique_constraint_definitions"] = sorted(self.unique_constraint_definitions)
return output
62 changes: 61 additions & 1 deletion cognite/neat/rules/validation/_dms_schema_errors.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABC
from dataclasses import dataclass
from functools import total_ordering
from typing import ClassVar
from typing import Any, ClassVar

from cognite.client.data_classes import data_modeling as dm

Expand Down Expand Up @@ -32,6 +32,12 @@ class MissingSpace(DMSSchemaError):
def message(self) -> str:
return f"The space {self.space} referred to by {self.referred_by} does not exist"

def dump(self) -> dict[str, Any]:
output = super().dump()
output["space"] = self.space
output["referred_by"] = self.referred_by
return output


@dataclass(frozen=True)
class MissingContainer(DMSSchemaError):
Expand All @@ -44,6 +50,12 @@ class MissingContainer(DMSSchemaError):
def message(self) -> str:
return f"The container {self.container} referred to by {self.referred_by} does not exist"

def dump(self) -> dict[str, Any]:
output = super().dump()
output["container"] = self.container
output["referred_by"] = self.referred_by
return output


@dataclass(frozen=True)
class MissingContainerProperty(DMSSchemaError):
Expand All @@ -60,6 +72,13 @@ def message(self) -> str:
f"does not exist in {self.referred_by}"
)

def dump(self) -> dict[str, Any]:
output = super().dump()
output["container"] = self.container
output["property"] = self.property
output["referred_by"] = self.referred_by
return output


@dataclass(frozen=True)
class MissingView(DMSSchemaError):
Expand All @@ -72,6 +91,12 @@ class MissingView(DMSSchemaError):
def message(self) -> str:
return f"The view {self.view} referred to by {self.referred_by} does not exist"

def dump(self) -> dict[str, Any]:
output = super().dump()
output["view"] = self.view
output["referred_by"] = self.referred_by
return output


@dataclass(frozen=True)
class MissingParentView(MissingView):
Expand All @@ -83,6 +108,11 @@ class MissingParentView(MissingView):
def message(self) -> str:
return f"The parent view referred to by {self.referred_by} does not exist"

def dump(self) -> dict[str, Any]:
output = super().dump()
output["referred_by"] = self.referred_by
return output


@dataclass(frozen=True)
class MissingSourceView(MissingView):
Expand All @@ -95,6 +125,11 @@ class MissingSourceView(MissingView):
def message(self) -> str:
return f"The source view referred to by {self.referred_by}.{self.property} does not exist"

def dump(self) -> dict[str, Any]:
output = super().dump()
output["property"] = self.property
return output


@dataclass(frozen=True)
class MissingEdgeView(MissingView):
Expand All @@ -107,6 +142,12 @@ class MissingEdgeView(MissingView):
def message(self) -> str:
return f"The edge view referred to by {self.referred_by}.{self.property} does not exist"

def dump(self) -> dict[str, Any]:
output = super().dump()
output["property"] = self.property
output["referred_by"] = self.referred_by
return output


@dataclass(frozen=True)
class DuplicatedViewInDataModel(DMSSchemaError):
Expand All @@ -119,6 +160,12 @@ class DuplicatedViewInDataModel(DMSSchemaError):
def message(self) -> str:
return f"The view {self.view} is duplicated in the DataModel {self.referred_by}"

def dump(self) -> dict[str, Any]:
output = super().dump()
output["referred_by"] = self.referred_by
output["view"] = self.view
return output


@dataclass(frozen=True)
class DirectRelationMissingSource(DMSSchemaError):
Expand All @@ -131,6 +178,12 @@ class DirectRelationMissingSource(DMSSchemaError):
def message(self) -> str:
return f"The source view referred to by {self.view_id}.{self.property} does not exist"

def dump(self) -> dict[str, Any]:
output = super().dump()
output["view_id"] = self.view_id
output["property"] = self.property
return output


@dataclass(frozen=True)
class ContainerPropertyUsedMultipleTimes(DMSSchemaError):
Expand All @@ -146,3 +199,10 @@ def message(self) -> str:
f"The container property {self.property} of {self.container} is used multiple times "
f"by the same view {self.referred_by}"
)

def dump(self) -> dict[str, Any]:
output = super().dump()
output["container"] = self.container
output["property"] = self.property
output["referred_by"] = sorted(self.referred_by)
return output
Loading

0 comments on commit 2281c7c

Please sign in to comment.