Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Commit

Permalink
eclipse-rdf4j/rdf4j#1298 initial implementation of a rdfs reasoner fo…
Browse files Browse the repository at this point in the history
…r the shacl engine

Signed-off-by: Håvard Ottestad <hmottestad@gmail.com>
  • Loading branch information
hmottestad committed Feb 19, 2019
1 parent 11e0d51 commit 5f32fc6
Show file tree
Hide file tree
Showing 30 changed files with 698 additions and 101 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public PlanNode getPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeS
SailConnection addedStatements = shaclSailConnection.getAddedStatements();

if (overrideTargetNode != null) {
PlanNode bulkedEternalLeftOuter = new LoggingNode(new BulkedExternalLeftOuterJoin(overrideTargetNode, shaclSailConnection, path.getQuery("?a", "?c"), false), "");
PlanNode bulkedEternalLeftOuter = new LoggingNode(new BulkedExternalLeftOuterJoin(overrideTargetNode, shaclSailConnection, path.getQuery("?a", "?c", null), false), "");
// filter by type against addedStatements, this is an optimization for when you add the type statement in the same transaction
PlanNode addedStatementsTypeFilter = new LoggingNode(new ExternalTypeFilterNode(addedStatements, classResource, bulkedEternalLeftOuter, 1, false), "");

Expand Down Expand Up @@ -100,7 +100,7 @@ public PlanNode getPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeS
}

// also add anything that matches the path from the previousConnection, eg. if you add ":peter a foaf:Person", and ":peter foaf:knows :steve" is already added
PlanNode bulkedExternalLeftOuter = new LoggingNode(new BulkedExternalLeftOuterJoin(bufferedAddedByShape.getPlanNode(), shaclSailConnection, path.getQuery("?a", "?c"), true), "");
PlanNode bulkedExternalLeftOuter = new LoggingNode(new BulkedExternalLeftOuterJoin(bufferedAddedByShape.getPlanNode(), shaclSailConnection, path.getQuery("?a", "?c", null), true), "");

// only get tuples that came from the first or the leftOuterJoin or bulkedExternalLeftOuter,
// we don't care if you added ":peter a foaf:Person" and nothing else and there is nothing else in the underlying sail
Expand All @@ -116,12 +116,12 @@ public PlanNode getPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeS
if(shaclSailConnection.stats.hasRemoved()) {

// Handle when a type statement has been removed, first get all removed type statements that match the classResource for this shape
PlanNode removedTypeStatements = new LoggingNode(new Select(shaclSailConnection.getRemovedStatements(), "?a a <" + classResource + ">"), "");
PlanNode removedTypeStatements = new LoggingNode(new Select(shaclSailConnection.getRemovedStatements(), "?a a <" + classResource + ">"), "removedTypeStatements");

// Build a query to run against the base sail. eg:
// ?c foaf:knows ?a.
// ?c a foaf:Person.
String query = path.getQuery("?c", "?a") + nodeShape.getQuery("?c", "?q");
String query = path.getQuery("?c", "?a", null) + nodeShape.getQuery("?c", "?q", shaclSailConnection.getRdfsSubClassOfReasoner());

// do bulked external join for the removed class statements again the query above.
// Essentially gets data that is now invalid because of the removed type statement
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public String toString() {
public PlanNode getPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeShape, boolean printPlans, PlanNode overrideTargetNode) {

if(overrideTargetNode != null){
PlanNode bulkedExternalLeftOuterJoin = new LoggingNode(new BulkedExternalLeftOuterJoin(overrideTargetNode, shaclSailConnection, path.getQuery("?a", "?c"), false), "");
PlanNode bulkedExternalLeftOuterJoin = new LoggingNode(new BulkedExternalLeftOuterJoin(overrideTargetNode, shaclSailConnection, path.getQuery("?a", "?c", null), false), "");
PlanNode groupByCount = new LoggingNode(new GroupByCount(bulkedExternalLeftOuterJoin), "");

DirectTupleFromFilter directTupleFromFilter = new DirectTupleFromFilter();
Expand Down Expand Up @@ -92,7 +92,7 @@ public PlanNode getPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeS

PlanNode unique = new LoggingNode(new Unique(trimmed), "");

PlanNode bulkedExternalLeftOuterJoin = new LoggingNode(new BulkedExternalLeftOuterJoin(unique, shaclSailConnection, path.getQuery("?a", "?c"), false), "");
PlanNode bulkedExternalLeftOuterJoin = new LoggingNode(new BulkedExternalLeftOuterJoin(unique, shaclSailConnection, path.getQuery("?a", "?c", null), false), "");

PlanNode groupByCount = new LoggingNode(new GroupByCount(bulkedExternalLeftOuterJoin), "");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public String toString() {
public PlanNode getPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeShape, boolean printPlans, PlanNode overrideTargetNode) {

if(overrideTargetNode != null){
PlanNode allStatements = new LoggingNode(new BulkedExternalLeftOuterJoin(overrideTargetNode, shaclSailConnection, path.getQuery("?a", "?c"), false), "");
PlanNode allStatements = new LoggingNode(new BulkedExternalLeftOuterJoin(overrideTargetNode, shaclSailConnection, path.getQuery("?a", "?c", null), false), "");
PlanNode groupBy = new LoggingNode(new GroupByCount(allStatements), "");

DirectTupleFromFilter filteredStatements = new DirectTupleFromFilter();
Expand Down Expand Up @@ -106,7 +106,7 @@ public PlanNode getPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeS

PlanNode planAddedForShape = new LoggingNode(nodeShape.getPlanAddedStatements(shaclSailConnection, nodeShape), "");

PlanNode select = new LoggingNode(shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getAddedStatements(), path.getQuery("?a", "?c"))), "");
PlanNode select = new LoggingNode(shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getAddedStatements(), path.getQuery("?a", "?c", null))), "");


if (nodeShape instanceof TargetClass) {
Expand All @@ -126,7 +126,7 @@ public PlanNode getPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeS

PlanNode trimTuple = new LoggingNode(new TrimTuple(minCountFilter, 0, 1), "");

PlanNode bulkedExternalLeftOuterJoin2 = new LoggingNode(new BulkedExternalLeftOuterJoin(trimTuple, shaclSailConnection, path.getQuery("?a", "?c"), false), "");
PlanNode bulkedExternalLeftOuterJoin2 = new LoggingNode(new BulkedExternalLeftOuterJoin(trimTuple, shaclSailConnection, path.getQuery("?a", "?c", null), false), "");

PlanNode groupBy2 = new LoggingNode(new GroupByCount(bulkedExternalLeftOuterJoin2), "");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.eclipse.rdf4j.model.vocabulary.SHACL;
import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection;
import org.eclipse.rdf4j.sail.SailConnection;
import org.eclipse.rdf4j.sail.shacl.RdfsSubClassOfReasoner;
import org.eclipse.rdf4j.sail.shacl.ShaclSail;
import org.eclipse.rdf4j.sail.shacl.ShaclSailConnection;
import org.eclipse.rdf4j.sail.shacl.planNodes.LoggingNode;
Expand Down Expand Up @@ -50,13 +51,13 @@ public PlanNode getPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeS

@Override
public PlanNode getPlanAddedStatements(ShaclSailConnection shaclSailConnection, NodeShape nodeShape) {
PlanNode node = shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getAddedStatements(), getQuery("?a", "?c")));
PlanNode node = shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getAddedStatements(), getQuery("?a", "?c", null)));
return new TrimTuple(new LoggingNode(node, ""), 0, 1);
}

@Override
public PlanNode getPlanRemovedStatements(ShaclSailConnection shaclSailConnection, NodeShape nodeShape) {
PlanNode node = shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getRemovedStatements(), getQuery("?a", "?c")));
PlanNode node = shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getRemovedStatements(), getQuery("?a", "?c", null)));
return new TrimTuple(new LoggingNode(node, ""), 0, 1);
}

Expand All @@ -78,7 +79,7 @@ public boolean requiresEvaluation(SailConnection addedStatements, SailConnection
}

@Override
public String getQuery(String subjectVariable, String objectVariable) {
public String getQuery(String subjectVariable, String objectVariable, RdfsSubClassOfReasoner rdfsSubClassOfReasoner) {
return subjectVariable+" ?b "+objectVariable;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,18 @@ public class PathPropertyShape extends PropertyShape {

@Override
public PlanNode getPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeShape, boolean printPlans, PlanNode overrideTargetNode) {
return shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection, path.getQuery("?a", "?c")));
return shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection, path.getQuery("?a", "?c", null)));
}

@Override
public PlanNode getPlanAddedStatements(ShaclSailConnection shaclSailConnection, NodeShape nodeShape) {
return shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getAddedStatements(), path.getQuery("?a", "?c")));
return shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getAddedStatements(), path.getQuery("?a", "?c", null)));

}

@Override
public PlanNode getPlanRemovedStatements(ShaclSailConnection shaclSailConnection, NodeShape nodeShape) {
return shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getRemovedStatements(), path.getQuery("?a", "?c")));
return shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getRemovedStatements(), path.getQuery("?a", "?c", null)));

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@

package org.eclipse.rdf4j.sail.shacl.AST;

import org.eclipse.rdf4j.sail.shacl.RdfsSubClassOfReasoner;

/**
* @author Håvard Ottestad
*/
public interface QueryGenerator {
String getQuery(String subjectVariable, String objectVariable);
String getQuery(String subjectVariable, String objectVariable, RdfsSubClassOfReasoner rdfsSubClassOfReasoner);
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.eclipse.rdf4j.model.vocabulary.SHACL;
import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection;
import org.eclipse.rdf4j.sail.SailConnection;
import org.eclipse.rdf4j.sail.shacl.RdfsSubClassOfReasoner;

import java.util.Objects;
import java.util.stream.Stream;
Expand Down Expand Up @@ -51,7 +52,7 @@ public boolean requiresEvaluation(SailConnection addedStatements, SailConnection
}

@Override
public String getQuery(String subjectVariable, String objectVariable) {
public String getQuery(String subjectVariable, String objectVariable, RdfsSubClassOfReasoner rdfsSubClassOfReasoner) {

return subjectVariable+" <" + path + "> "+objectVariable+" . \n";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ interface FilterAttacher {

static public PlanNode getGenericSingleObjectPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeShape, FilterAttacher filterAttacher, PathPropertyShape pathPropertyShape, PlanNode overrideTargetNode) {
if (overrideTargetNode != null) {
PlanNode bulkedExternalInnerJoin = new LoggingNode(new BulkedExternalInnerJoin(overrideTargetNode, shaclSailConnection, pathPropertyShape.path.getQuery("?a", "?c"), false), "");
PlanNode bulkedExternalInnerJoin = new LoggingNode(new BulkedExternalInnerJoin(overrideTargetNode, shaclSailConnection, pathPropertyShape.path.getQuery("?a", "?c", null), false), "");

DirectTupleFromFilter invalidValues = new DirectTupleFromFilter();
filterAttacher.attachFilter(bulkedExternalInnerJoin, null, new PushBasedLoggingNode(invalidValues));
Expand All @@ -33,7 +33,7 @@ static public PlanNode getGenericSingleObjectPlan(ShaclSailConnection shaclSailC

BufferedSplitter bufferedSplitter = new BufferedSplitter(addedByShape);

PlanNode addedByPath = new LoggingNode(shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getAddedStatements(), pathPropertyShape.path.getQuery("?a", "?c"))), "");
PlanNode addedByPath = new LoggingNode(shaclSailConnection.getCachedNodeFor(new Select(shaclSailConnection.getAddedStatements(), pathPropertyShape.path.getQuery("?a", "?c", null))), "");

// this is essentially pushing the filter down below the join
DirectTupleFromFilter invalidValuesDirectOnPath = new DirectTupleFromFilter();
Expand All @@ -52,7 +52,7 @@ static public PlanNode getGenericSingleObjectPlan(ShaclSailConnection shaclSailC
top = new LoggingNode(new UnionNode(top, typeFilterPlan), "");
}

PlanNode bulkedExternalInnerJoin = new LoggingNode(new BulkedExternalInnerJoin(bufferedSplitter.getPlanNode(), shaclSailConnection, pathPropertyShape.path.getQuery("?a", "?c"), true), "");
PlanNode bulkedExternalInnerJoin = new LoggingNode(new BulkedExternalInnerJoin(bufferedSplitter.getPlanNode(), shaclSailConnection, pathPropertyShape.path.getQuery("?a", "?c", null), true), "");

top = new LoggingNode(new UnionNode(top, bulkedExternalInnerJoin), "");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@
import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection;
import org.eclipse.rdf4j.sail.NotifyingSailConnection;
import org.eclipse.rdf4j.sail.SailConnection;
import org.eclipse.rdf4j.sail.shacl.RdfsSubClassOfReasoner;
import org.eclipse.rdf4j.sail.shacl.ShaclSailConnection;
import org.eclipse.rdf4j.sail.shacl.planNodes.ExternalTypeFilterNode;
import org.eclipse.rdf4j.sail.shacl.planNodes.LoggingNode;
import org.eclipse.rdf4j.sail.shacl.planNodes.PlanNode;
import org.eclipse.rdf4j.sail.shacl.planNodes.Select;
import org.eclipse.rdf4j.sail.shacl.planNodes.TrimTuple;

import java.util.Set;
import java.util.stream.Stream;

/**
Expand All @@ -45,18 +47,18 @@ public class TargetClass extends NodeShape {

@Override
public PlanNode getPlan(ShaclSailConnection shaclSailConnection, NodeShape nodeShape, boolean printPlans, PlanNode overrideTargetNode) {
return new TrimTuple(new LoggingNode(new Select(shaclSailConnection, getQuery("?a", "?c")), ""), 0, 1);
return new TrimTuple(new LoggingNode(new Select(shaclSailConnection, getQuery("?a", "?c", shaclSailConnection.getRdfsSubClassOfReasoner())), ""), 0, 1);
}

@Override
public PlanNode getPlanAddedStatements(ShaclSailConnection shaclSailConnection, NodeShape nodeShape) {
return new TrimTuple(new LoggingNode(new Select(shaclSailConnection.getAddedStatements(), getQuery("?a", "?c")), ""), 0, 1);
return new TrimTuple(new LoggingNode(new Select(shaclSailConnection.getAddedStatements(), getQuery("?a", "?c", null)), ""), 0, 1);

}

@Override
public PlanNode getPlanRemovedStatements(ShaclSailConnection shaclSailConnection, NodeShape nodeShape) {
return new Select(shaclSailConnection.getRemovedStatements(), getQuery("?a", "?c"));
return new Select(shaclSailConnection.getRemovedStatements(), getQuery("?a", "?c", null));
}

@Override
Expand All @@ -65,7 +67,18 @@ public boolean requiresEvaluation(SailConnection addedStatements, SailConnection
}

@Override
public String getQuery(String subjectVariable, String objectVariable) {
public String getQuery(String subjectVariable, String objectVariable, RdfsSubClassOfReasoner rdfsSubClassOfReasoner) {
if(rdfsSubClassOfReasoner != null ){
Set<Resource> resources = rdfsSubClassOfReasoner.backwardsChain(targetClass);
if(resources.size() > 1){
return resources
.stream()
.map(r -> "{ BIND(rdf:type as ?b1) \n BIND(<" + r + "> as "+objectVariable+") \n "+subjectVariable+" ?b1 "+objectVariable+". } \n")
.reduce((l,r)-> l+ " UNION "+r)
.get();
}
}

return "BIND(rdf:type as ?b1) \n BIND(<" + targetClass + "> as "+objectVariable+") \n "+subjectVariable+" ?b1 "+objectVariable+". \n";
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package org.eclipse.rdf4j.sail.shacl;

import org.eclipse.rdf4j.common.iteration.Iterations;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.RDFS;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;

public class RdfsSubClassOfReasoner {

private final Collection<Statement> subClassOfStatements = new ArrayList<>();
private final Collection<Resource> types = new ArrayList<>();

private final Map<Resource, Set<Resource>> forwardChainCache = new HashMap<>();
private final Map<Resource, Set<Resource>> backwardsChainCache = new HashMap<>();


public Stream<Statement> forwardChain(Statement statement) {
SimpleValueFactory vf = SimpleValueFactory.getInstance();
if (statement.getPredicate().equals(RDF.TYPE) && forwardChainCache.containsKey(statement.getObject())) {
return forwardChainCache.get(statement.getObject()).stream().map(r -> vf.createStatement(statement.getSubject(), RDF.TYPE, r, statement.getContext()));
}
return Stream.of(statement);
}

public Set<Resource> backwardsChain(Resource type) {
Set<Resource> resources = backwardsChainCache.get(type);
if(resources != null){
return resources;
}
return Collections.emptySet();
}

void addSubClassOfStatement(Statement st) {
subClassOfStatements.add(st);
types.add(st.getSubject());
types.add((Resource) st.getObject());
}

private void calculateSubClassOf(Collection<Statement> subClassOfStatements) {
types.forEach(type -> {
if (!forwardChainCache.containsKey(type)) {
forwardChainCache.put(type, new HashSet<>());
}
if (!backwardsChainCache.containsKey(type)) {
backwardsChainCache.put(type, new HashSet<>());
}

forwardChainCache.get(type).add(type);
backwardsChainCache.get(type).add(type);

});

subClassOfStatements.forEach(s -> {
Resource subClass = s.getSubject();
Resource supClass = (Resource) s.getObject();
if (!forwardChainCache.containsKey(subClass)) {
forwardChainCache.put(subClass, new HashSet<>());
}
if (!backwardsChainCache.containsKey(supClass)) {
backwardsChainCache.put(supClass, new HashSet<>());
}

forwardChainCache.get(subClass).add((Resource) s.getObject());
backwardsChainCache.get(supClass).add((Resource) s.getSubject());

});

forwardChainUntilFixPoint(forwardChainCache);
forwardChainUntilFixPoint(backwardsChainCache);


}

private void forwardChainUntilFixPoint(Map<Resource, Set<Resource>> forwardChainCache) {
// Fixed point approach to finding all sub-classes.
// prevSize is the size of the previous application of the function
// newSize is the size of the current application of the function
// Fixed point is reached when they are the same.
// Eg. Two consecutive applications return the same number of subclasses
long prevSize = 0;
final long[] newSize = {-1};
while (prevSize != newSize[0]) {

prevSize = newSize[0];

newSize[0] = 0;

forwardChainCache.forEach((key, value) -> {
List<Resource> temp = new ArrayList<>();
value.forEach(superClass -> temp.addAll(resolveTypes(superClass, forwardChainCache)));

value.addAll(temp);
newSize[0] += value.size();
});

}
}

private Set<Resource> resolveTypes(Resource value, Map<Resource, Set<Resource>> forwardChainCache) {
Set<Resource> iris = forwardChainCache.get(value);
return iris != null ? iris : Collections.emptySet();
}


static RdfsSubClassOfReasoner createReasoner(ShaclSailConnection shaclSailConnection) {
RdfsSubClassOfReasoner rdfsSubClassOfReasoner = new RdfsSubClassOfReasoner();

try (Stream<? extends Statement> stream = Iterations.stream(shaclSailConnection.getStatements(null, RDFS.SUBCLASSOF, null, false))) {
stream.forEach(rdfsSubClassOfReasoner::addSubClassOfStatement);
}

rdfsSubClassOfReasoner.calculateSubClassOf(rdfsSubClassOfReasoner.subClassOfStatements);
return rdfsSubClassOfReasoner;
}
}

/*
*/
Loading

0 comments on commit 5f32fc6

Please sign in to comment.