cognitedata · nikokaoja · Jun 24, 2024 · Jun 20, 2024 · Jun 20, 2024 · Jun 20, 2024
@@ -1,5 +1,5 @@
 .PHONY: run-explorer run-tests run-linters build-ui build-python build-docker run-docker compose-up
-version="0.81.12"
+version="0.82.0"
 run-explorer:
 	@echo "Running explorer API server..."
 	# open "http://localhost:8000/static/index.html" || true

@@ -1 +1 @@
-__version__ = "0.81.12"
+__version__ = "0.82.0"
@@ -2,7 +2,6 @@
 It is a bit ugly and needs some proper refactoring, but it is not a priority at the moment.
 """
 
-import logging
 import random
 import warnings
 from collections import OrderedDict
@@ -87,11 +86,11 @@ def generate_triples(
     stop_on_exception: bool = False,
     allow_isolated_classes: bool = True,
 ) -> list[Triple]:
-    """Generate mock triples based on data model defined transformation rules and desired number
+    """Generate mock triples based on data model defined in rules and desired number
     of class instances
 
     Args:
-        transformation_rules : Transformation rules defining the data model
+        rules : Rules defining the data model
         class_count: Target class count for each class in the ontology
         stop_on_exception: To stop if exception is encountered or not, default is False
         allow_isolated_classes: To allow generation of instances for classes that are not
@@ -107,11 +106,9 @@ def generate_triples(
     if non_existing_classes := set(class_count.keys()) - defined_classes:
         msg = f"Class count contains classes {non_existing_classes} for which properties are not defined in Data Model!"
         if stop_on_exception:
-            logging.error(msg)
             raise ValueError(msg)
         else:
             msg += " These classes will be ignored."
-            logging.warning(msg)
             warnings.warn(msg, stacklevel=2)
             for class_ in non_existing_classes:
                 class_count.pop(class_)
@@ -279,14 +276,12 @@ def _generate_mock_object_property_triples(
     if property_definition.value_type not in instance_ids:
         msg = f"Class {property_definition.value_type} not found in class count! "
         if stop_on_exception:
-            logging.error(msg)
             raise ValueError(msg)
         else:
             msg += (
                 f"Skipping creating triples for property {property_definition.name} "
                 f"of class {class_.suffix} which expects values of this type!"
             )
-            logging.warning(msg)
             warnings.warn(msg, stacklevel=2)
             return []
 
@@ -354,7 +349,6 @@ def _generate_triples_per_class(
             )
 
         else:
-            logging.error(f"Property type {property_.value_type} not supported!")
             raise ValueError(f"Property type {property_.value_type} not supported!")
 
     return triples
@@ -93,7 +93,7 @@ def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply |
             yield from issues
             tracker.issue(issues)
             class_name = self.class_by_view_id.get(view.as_id(), view.external_id)
-            triples = self.graph_store.queries.triples_of_type_instances(class_name)
+            triples = self.graph_store.read(class_name)
             for identifier, properties in _triples2dictionary(triples).items():
                 try:
                     yield self._create_node(identifier, properties, pydantic_cls, view_id)

@@ -0,0 +1,3 @@
+from ._base import Queries
+
+__all__ = ["Queries"]
@@ -0,0 +1,99 @@
+import warnings
+from typing import cast
+
+from rdflib import RDF, Graph, URIRef
+from rdflib.query import ResultRow
+
+from cognite.neat.rules.models.entities import ClassEntity
+from cognite.neat.rules.models.information import InformationRules
+from cognite.neat.utils.utils import remove_namespace
+
+from ._construct import build_construct_query
+
+
+class Queries:
+    """Helper class for storing standard queries for the graph store."""
+
+    def __init__(self, graph: Graph, rules: InformationRules | None = None):
+        self.graph = graph
+        self.rules = rules
+
+    def list_instances_ids_of_class(self, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
+        """Get instances ids for a given class
+
+        Args:
+            class_uri: Class for which instances are to be found
+            limit: Max number of instances to return, by default -1 meaning all instances
+
+        Returns:
+            List of class instance URIs
+        """
+        query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
+            "class", class_uri
+        ).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
+        return [cast(tuple, res)[0] for res in list(self.graph.query(query_statement))]
+
+    def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
+        """Get all triples for instances of a given class
+
+        Args:
+            class_uri: Class for which instances are to be found
+
+        Returns:
+            List of triples for instances of the given class
+        """
+        query = (
+            f"SELECT ?instance ?prop ?value "
+            f"WHERE {{ ?instance rdf:type <{class_uri}> . ?instance ?prop ?value . }} order by ?instance "
+        )
+
+        # Select queries gives an iterable of result rows
+        return cast(list[ResultRow], list(self.graph.query(query)))
+
+    def triples_of_type_instances(self, rdf_type: str) -> list[tuple[str, str, str]]:
+        """Get all triples of a given type.
+
+        This method assumes the graph has been transformed into the default namespace.
+        """
+
+        if self.rules:
+            query = (
+                f"SELECT ?instance ?prop ?value "
+                f"WHERE {{ ?instance a <{self.rules.metadata.namespace[rdf_type]}> . ?instance ?prop ?value . }} "
+                "order by ?instance"
+            )
+
+            result = self.graph.query(query)
+
+            # We cannot include the RDF.type in case there is a neat:type property
+            return [remove_namespace(*triple) for triple in result if triple[1] != RDF.type]  # type: ignore[misc, index]
+        else:
+            warnings.warn("No rules found for the graph store, returning empty list.", stacklevel=2)
+            return []
+
+    def construct_instances_of_class(self, class_: str, properties_optional: bool = True) -> list[tuple[str, str, str]]:
+        """CONSTRUCT instances for a given class from the graph store
+
+        Args:
+            class_: Class entity for which we want to generate query
+            properties_optional: Whether to make all properties optional, default True
+
+        Returns:
+            List of triples for instances of the given class
+        """
+
+        if self.rules and (
+            query := build_construct_query(
+                ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_),
+                self.graph,
+                self.rules,
+                properties_optional,
+            )
+        ):
+            result = self.graph.query(query)
+
+            # We cannot include the RDF.type in case there is a neat:type property
+            return [remove_namespace(*triple) for triple in result if triple[1] != RDF.type]  # type: ignore[misc, index]
+        else:
+            warnings.warn("No rules found for the graph store, returning empty list.", stacklevel=2)
+            return []
@@ -0,0 +1,185 @@
+import re
+from typing import cast
+
+from rdflib import Graph, URIRef
+
+from cognite.neat.rules.analysis import InformationArchitectRulesAnalysis
+from cognite.neat.rules.models._rdfpath import (
+    AllReferences,
+    Hop,
+    RDFPath,
+    SingleProperty,
+    Traversal,
+)
+from cognite.neat.rules.models.entities import ClassEntity
+from cognite.neat.rules.models.information import InformationProperty, InformationRules
+from cognite.neat.utils.utils import most_occurring_element
+
+from ._shared import Triple, hop2property_path
+
+_QUERY_TEMPLATE = """CONSTRUCT {{ {graph_template} }}
+                     WHERE {{     {graph_pattern}
+                                  {filter}
+                     }}"""
+
+
+def build_construct_query(
+    class_: ClassEntity,
+    graph: Graph,
+    rules: InformationRules,
+    properties_optional: bool = True,
+    class_instances: list[URIRef] | None = None,
+) -> str | None:
+    """Builds a CONSTRUCT query for a given class and rules and optionally filters by class instances.
+
+    Args:
+        class_ : The class entity for which the query is generated.
+        graph : The graph containing instances of classes.
+        rules : The information rules to use for query generation.
+        properties_optional : Whether to make all properties optional. Defaults to True.
+        class_instances : List of class instances to filter by. Defaults to None (no filter, return all instances).
+
+    Returns:
+        str: CONSTRUCT query.
+
+    !!! note "On CONSTRUCT Query"
+        CONSTRUCT query is composed of two parts: graph template and graph pattern.
+        Graph template is used the shape of instance acquired using graph pattern.
+        This allows us to create a new graph with the new shape without actually modifying
+        the original graph, or creating new instances.
+
+        The CONSTRUCT query is far less forgiving than the SELECT query. It will not return
+        anything if one of the properties that define the "shape" of the class instance is missing.
+        This is the reason why there is an option to make all properties optional, so that
+        the query will return all instances that have at least one property defined.
+    """
+    if (
+        transformations := InformationArchitectRulesAnalysis(rules)
+        .class_property_pairs(only_rdfpath=True, consider_inheritance=True)
+        .get(class_, None)
+    ):
+        templates, patterns = to_construct_triples(
+            graph, list(transformations.values()), rules.prefixes, properties_optional
+        )
+
+        return _QUERY_TEMPLATE.format(
+            graph_template="\n".join(triples2sparql_statement(templates)),
+            graph_pattern="\n".join(triples2sparql_statement(patterns)),
+            filter="" if not class_instances else add_filter(class_instances),
+        )
+
+    else:
+        return None
+
+
+def add_filter(class_instances: list[URIRef]):
+    class_instances_formatted = [f"<{instance}>" for instance in class_instances]
+    return f"FILTER (?instance IN ({', '.join(class_instances_formatted)}))"
+
+
+def to_construct_triples(
+    graph: Graph, transformations: list[InformationProperty], prefixes: dict, properties_optional: bool = True
+) -> tuple[list[Triple], list[Triple]]:
+    """Converts transformations of a class to CONSTRUCT triples which are used to generate CONSTRUCT query
+
+    Args:
+        graph: Graph containing instances of classes (used for property inference for hops)
+        transformations : List of transformations to use to form triples
+        prefixes : Dictionary of prefixes for namespaces
+        properties_optional : Flag indicating if properties should be optional. Defaults to True.
+
+    Returns:
+        tuple: Tuple of triples that define graph template and graph pattern parts of CONSTRUCT query
+
+
+    !!! note "Purely inherited transformations"
+        Assumption that neat makes is that in case of purely inherited transformations
+        we will type instance with class to which transformation belongs to.
+
+        Otherwise we will type instance with class that is most occurring in non-inherited
+        transformations.
+
+    """
+    # TODO: Add handling of UNIONs in rules
+
+    templates = []
+    patterns = []
+    non_inherited_starting_rdf_types = []
+
+    for transformation in transformations:
+        traversal = cast(RDFPath, transformation.transformation).traversal
+
+        # keeping track of starting rdf types of non-inherited transformations/properties
+        if isinstance(traversal, Traversal) and not transformation.inherited:
+            non_inherited_starting_rdf_types.append(traversal.class_.id)
+
+        graph_template_triple = Triple(
+            subject="?instance",
+            predicate=f"{transformation.class_.prefix}:{transformation.property_}",
+            object=f'?{re.sub(r"[^_a-zA-Z0-9/_]", "_", str(transformation.property_).lower())}',
+            optional=False,
+        )
+        templates.append(graph_template_triple)
+
+        # use case AllReferences: binding instance to certain rdf property
+        if isinstance(traversal, AllReferences):
+            graph_pattern_triple = Triple(
+                subject="BIND(?instance", predicate="AS", object=f"{graph_template_triple.object})", optional=False
+            )
+
+        # use case SingleProperty: simple property traversal
+        elif isinstance(traversal, SingleProperty):
+            graph_pattern_triple = Triple(
+                subject=graph_template_triple.subject,
+                predicate=traversal.property.id,
+                object=graph_template_triple.object,
+                optional=True if properties_optional else not transformation.is_mandatory,
+            )
+
+        # use case Hop: property traversal with multiple hops turned into property path
+        # see: https://www.oxfordsemantic.tech/faqs/what-is-a-property-path
+        elif isinstance(traversal, Hop):
+            graph_pattern_triple = Triple(
+                subject="?instance",
+                predicate=hop2property_path(graph, traversal, prefixes),
+                object=graph_template_triple.object,
+                optional=True if properties_optional else not transformation.is_mandatory,
+            )
+
+        # other type of rdfpaths are skipped
+        else:
+            continue
+
+        patterns.append(graph_pattern_triple)
+
+    # Add first triple for graph pattern stating type of object
+    # we use most occurring here to pull out most occurring rdf type of the starting
+    # node of the transformation, or the class itself to which the transformation is
+    # defined for.
+    # This is safeguard in case there are multiple classes in the graph pattern
+    patterns.insert(
+        0,
+        Triple(
+            subject="?instance",
+            predicate="a",
+            object=(
+                most_occurring_element(non_inherited_starting_rdf_types)
+                if non_inherited_starting_rdf_types
+                else str(transformation.class_)
+            ),
+            optional=False,
+        ),
+    )
+
+    return templates, patterns
+
+
+def triples2sparql_statement(triples: list[Triple]):
+    return [
+        (
+            f"OPTIONAL {{ {triple.subject} {triple.predicate} {triple.object} . }}"
+            if triple.optional
+            else f"{triple.subject} {triple.predicate} {triple.object} ."
+        )
+        for triple in triples
+    ]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from ._base import Queries

		__all__ = ["Queries"]