Skip to content

Commit

Permalink
Reduce allocation, work with snapshots.
Browse files Browse the repository at this point in the history
  • Loading branch information
JervenBolleman committed Mar 20, 2024
1 parent 087f1a6 commit b203bf6
Show file tree
Hide file tree
Showing 11 changed files with 330 additions and 215 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>io.github.jervenbolleman</groupId>
<artifactId>sapfhir</artifactId>
<version>0.2-M2</version>
<version>0.2-SNAPSHOT</version>
<packaging>jar</packaging>
<licenses>
<license>
Expand Down Expand Up @@ -179,7 +179,7 @@
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>handlegraph4j</artifactId>
<version>1.2</version>
<version>1.3-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,30 +49,28 @@
import swiss.sib.swissprot.sapfhir.values.HandleGraphValueFactory;

/**
* Generate the triples for the different possible values htat can be asked for.
* Generate the triples for the different possible values that can be asked for.
*
* @author <a href="mailto:jerven.bolleman@sib.swiss">Jerven Bolleman</a>
* @param <P> the type of PathHandle
* @param <S> the type of StepHandle
* @param <E> the type of EdgeHandle
* @param <N> the type of NodeHandle
* @param vf value factory to create new literals
* @param statementProviders used to create statements from handlegraphs
*/
public class PathHandleGraphTripleSource<P extends PathHandle, S extends StepHandle, N extends NodeHandle, E extends EdgeHandle<N>>
implements TripleSource {

private final HandleGraphValueFactory<P, S, N, E> vf;
private final List<StatementProvider> statementProviders;
record PathHandleGraphTripleSource<P extends PathHandle, S extends StepHandle, N extends NodeHandle, E extends EdgeHandle<N>>(
HandleGraphValueFactory<P, S, N, E> vf, List<StatementProvider> statementProviders) implements TripleSource {

/**
* A triple source for a certain sail
*
* @param sail the handlegraph backed sail we extract data from
*/
public PathHandleGraphTripleSource(PathHandleGraphSail<P, S, N, E> sail) {
this.vf = sail.getValueFactory();
this.statementProviders = List.of(new StepPositionStatementProvider<>(sail),
new NodeRelatedStatementProvider<>(sail), new StepRelatedStatementProvider<>(sail),
new PathRelatedStatementProvider<>(sail));
this(sail.getValueFactory(),
List.of(new StepPositionStatementProvider<>(sail), new NodeRelatedStatementProvider<>(sail),
new StepRelatedStatementProvider<>(sail), new PathRelatedStatementProvider<>(sail)));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,11 @@
import static io.github.jervenbolleman.handlegraph4j.iterators.AutoClosedIterator.empty;
import static io.github.jervenbolleman.handlegraph4j.iterators.AutoClosedIterator.filter;
import static io.github.jervenbolleman.handlegraph4j.iterators.AutoClosedIterator.flatMap;
import static io.github.jervenbolleman.handlegraph4j.iterators.AutoClosedIterator.from;
import static io.github.jervenbolleman.handlegraph4j.iterators.AutoClosedIterator.map;
import static io.github.jervenbolleman.handlegraph4j.iterators.AutoClosedIterator.of;
import static swiss.sib.swissprot.sapfhir.statements.StatementProvider.nodeIriFromIRI;
import static swiss.sib.swissprot.sapfhir.statements.StatementProvider.nodeIriFromIri;

import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
Expand Down Expand Up @@ -88,8 +86,7 @@ public boolean objectMightReturnValues(Value val) {
return true;
} else if (VG.Node.equals(val)) {
return true;
} else if (val instanceof Literal) {
Literal lit = (Literal) val;
} else if (val instanceof Literal lit) {
return (XSD.STRING.equals(lit.getDatatype()));
}
return false;
Expand All @@ -98,28 +95,28 @@ public boolean objectMightReturnValues(Value val) {
@Override
public AutoClosedIterator<Statement> getStatements(Resource subject, IRI predicate, Value object) {
if (subject instanceof BNode) {
return AutoClosedIterator.empty();
return empty();
}
NodeIRI<N> nodeSubject = nodeIriFromIRI((IRI) subject, sail);
NodeIRI<N> nodeSubject = nodeIriFromIri((IRI) subject, sail);
if (nodeSubject != null) {
return generateTriplesForKnownNode(nodeSubject, predicate, object);
} else if (subject == null && object == null) {
return generateTriplesForAllNodes(predicate);
} else if (object instanceof Literal && (predicate == null || RDF.VALUE.equals(predicate))) {
return getNodeTriplesForKnownSequence(object, RDF.VALUE);
} else if (object instanceof IRI) {
NodeIRI<N> nodeObject = nodeIriFromIRI((IRI) object, sail);
} else if (object instanceof Literal lit && (predicate == null || RDF.VALUE.equals(predicate))) {
return getNodeTriplesForKnownSequence(lit, RDF.VALUE);
} else if (object instanceof IRI iri) {
NodeIRI<N> nodeObject = nodeIriFromIri(iri, sail);
if (nodeObject != null) {
sail.pathGraph().followEdgesToWardsTheRight(nodeObject.node());
} else if (VG.Node.equals(object)) {
} else if (VG.Node.equals(iri)) {
AutoClosedIterator<N> nodes = sail.pathGraph().nodes();
return map(nodes, (n) -> {
var ni = new NodeIRI<>(n.id(), sail);
return new UnsafeStatement(ni, RDF.TYPE, VG.Node);
});
}
}
return AutoClosedIterator.empty();
return empty();
}

private AutoClosedIterator<Statement> generateTriplesForAllNodes(IRI predicate) {
Expand Down Expand Up @@ -147,23 +144,23 @@ private AutoClosedIterator<Statement> generateTriplesForKnownNode(NodeIRI<N> nod
var typeValue = nodeToTriples(node, predicate, object);

if ((predicate == null || linkPredicates.contains(predicate)) && ((object instanceof IRI) || object == null)) {
NodeIRI<N> nodeObject = nodeIriFromIRI((IRI) object, sail);
NodeIRI<N> nodeObject = nodeIriFromIri((IRI) object, sail);
var linksForNode = linksForNode(node, predicate, nodeObject);
var typesAndLinks = concat(typeValue, linksForNode);
return typesAndLinks;
}
return typeValue;
}

private AutoClosedIterator<Statement> getNodeTriplesForKnownSequence(Value object, IRI predicate) {
Literal lit = (Literal) object;
private AutoClosedIterator<Statement> getNodeTriplesForKnownSequence(Literal lit, IRI predicate) {

if ((lit.getDatatype() == null || lit.getDatatype() == XSD.STRING) && lit.getLanguage().isEmpty()) {
String label = lit.getLabel();
if (Sequence.stringCanBeDNASequence(label)) {
byte[] bytes = label.getBytes(StandardCharsets.US_ASCII);
Sequence seq = SequenceType.fromByteArray(bytes);
var nodesWithSequence = sail.pathGraph().nodesWithSequence(seq);
Function<N, AutoClosedIterator<Statement>> name = n -> nodeToTriples(n, predicate, object);
Function<N, AutoClosedIterator<Statement>> name = n -> nodeToTriples(n, predicate, lit);
var map = map(nodesWithSequence, name);
return flatMap(map);
}
Expand All @@ -186,21 +183,20 @@ private AutoClosedIterator<Statement> nodeIriToTriples(IRI predicate, Value obje
Literal sequence = p.get();
if (object == null || sequence.equals(object)) {
return of(new UnsafeStatement(nodeSubject, RDF.VALUE, sequence));
} else
} else {
return empty();

}
} else if (VG.Node.equals(object)){
return of(nodeTypeStatement(nodeSubject));
}
Statement[] statements = new Statement[2];
Literal seq = p.get();
if (object == null) {
statements[0] = nodeTypeStatement(nodeSubject);
return of(nodeTypeStatement(nodeSubject), new UnsafeStatement(nodeSubject, RDF.VALUE, seq));
} else if (seq.equals(object)){
return of(new UnsafeStatement(nodeSubject, RDF.VALUE, seq));
} else {
return empty();
}
Literal sequence = p.get();
Statement nodeValueStatement = new UnsafeStatement(nodeSubject, RDF.VALUE, sequence);
statements[1] = nodeValueStatement;

var i = from(Arrays.asList(statements[0], statements[1]).iterator());
var f = filter(i, Objects::nonNull);
return StatementProvider.filter(object, f);
}

private AutoClosedIterator<Statement> nodeToTriples(N node, IRI predicate, Value object) {
Expand All @@ -215,86 +211,111 @@ private Statement nodeTypeStatement(NodeIRI<N> nodeSubject) {

private AutoClosedIterator<Statement> linksForNode(N node, IRI predicate, NodeIRI<N> object) {
PathGraph<P, S, N, E> pg = sail.pathGraph();
AutoClosedIterator<E> asStream = pg.followEdgesToWardsTheLeft(node);
AutoClosedIterator<E> leftStream = pg.followEdgesToWardsTheLeft(node);
if (object != null) {
Predicate<E> rightMatches = e -> pg.asLong(e.right()) == object.id();
asStream = filter(asStream, rightMatches);
leftStream = filter(leftStream, rightMatches);
}
return edgesToStatements(predicate, asStream);

// AutoClosedIterator<E> rightStream = pg.followEdgesToWardsTheRight(node);
// if (object != null) {
// Predicate<E> rightMatches = e -> pg.asLong(e.right()) == object.id();
// rightStream = filter(rightStream, rightMatches);
// }
return edgesToStatements(predicate, leftStream);
}

private AutoClosedIterator<Statement> edgesToStatements(IRI predicate, AutoClosedIterator<E> asStream) {
if (VG.linksForwardToForward.equals(predicate)) {
return map(asStream, this::forwardToForward);
return filter(map(asStream, this::forwardToForward), Objects::nonNull);
} else if (VG.linksForwardToReverse.equals(predicate)) {
return map(asStream, this::forwardToReverse);
return filter(map(asStream, this::forwardToReverse), Objects::nonNull);
} else if (VG.linksReverseToReverse.equals(predicate)) {
return map(asStream, this::reverseToReverse);
return filter(map(asStream, this::reverseToReverse), Objects::nonNull);
} else if (VG.linksReverseToForward.equals(predicate)) {
return map(asStream, this::reverseToForward);
return filter(map(asStream, this::reverseToForward), Objects::nonNull);
} else if (VG.links.equals(predicate)) {
return map(asStream, this::links);
} else {
var map = map(asStream, e -> {
var i = Arrays.asList(forwardToForward(e), forwardToReverse(e), reverseToReverse(e),
reverseToForward(e), links(e)).iterator();
return from(i);
});
return filter(flatMap(map), Objects::nonNull);
return flatMap(map(asStream, this::edgeToStatements));
}
}

private Statement links(E edge) {
return new UnsafeStatement(new NodeIRI<>(sail.pathGraph().asLong(edge.left()), sail),
VG.links, new NodeIRI<>(sail.pathGraph().asLong(edge.right()), sail));
NodeIRI<N> left = new NodeIRI<>(sail.pathGraph().asLong(edge.left()), sail);
NodeIRI<N> right = new NodeIRI<>(sail.pathGraph().asLong(edge.right()), sail);
return links(left, right);
}

private Statement links(NodeIRI<N> left , NodeIRI<N> right) {
return new UnsafeStatement(left, VG.links, right);
}

private AutoClosedIterator<Statement> edgeToStatements(E edge) {
NodeIRI<N> left = new NodeIRI<>(sail.pathGraph().asLong(edge.left()), sail);
NodeIRI<N> right = new NodeIRI<>(sail.pathGraph().asLong(edge.right()), sail);
boolean leftIsReverse = sail.pathGraph().isReverseNodeHandle(edge.left());
boolean rightIsReverse = sail.pathGraph().isReverseNodeHandle(edge.right());
var links = links(left, right);
if (!leftIsReverse && !rightIsReverse) {
return of(links, new UnsafeStatement(left, VG.linksForwardToForward, right));
} else if (!leftIsReverse && rightIsReverse) {
return of(links, new UnsafeStatement(left, VG.linksForwardToReverse, right));
} else if (leftIsReverse && rightIsReverse) {
return of(links, new UnsafeStatement(left, VG.linksReverseToReverse, right));
} else {
// if (leftIsReverse && !rightIsReverse) {
return of(links, new UnsafeStatement(left, VG.linksReverseToForward, right));
}
}

private Statement forwardToForward(E edge) {
if (!sail.pathGraph().isReverseNodeHandle(edge.left()) && !sail.pathGraph().isReverseNodeHandle(edge.right())) {
return new UnsafeStatement(
new NodeIRI<>(sail.pathGraph().asLong(edge.left()), sail), VG.linksForwardToForward,
new NodeIRI<>(sail.pathGraph().asLong(edge.right()), sail));
NodeIRI<N> left = new NodeIRI<>(sail.pathGraph().asLong(edge.left()), sail);
NodeIRI<N> right = new NodeIRI<>(sail.pathGraph().asLong(edge.right()), sail);
return new UnsafeStatement(left, VG.linksForwardToForward, right);
} else {
return null;
}
}

private Statement forwardToReverse(E edge) {
if (!sail.pathGraph().isReverseNodeHandle(edge.left()) && sail.pathGraph().isReverseNodeHandle(edge.right())) {
return new UnsafeStatement(
new NodeIRI<>(sail.pathGraph().asLong(edge.left()), sail), VG.linksForwardToReverse,
new NodeIRI<>(sail.pathGraph().asLong(edge.right()), sail));
NodeIRI<N> left = new NodeIRI<>(sail.pathGraph().asLong(edge.left()), sail);
NodeIRI<N> right = new NodeIRI<>(sail.pathGraph().asLong(edge.right()), sail);
return new UnsafeStatement(left, VG.linksForwardToReverse, right);
} else {
return null;
}
}

private Statement reverseToReverse(E edge) {
if (sail.pathGraph().isReverseNodeHandle(edge.left()) && sail.pathGraph().isReverseNodeHandle(edge.right())) {
return new UnsafeStatement(
new NodeIRI<>(sail.pathGraph().asLong(edge.left()), sail), VG.linksReverseToReverse,
new NodeIRI<>(sail.pathGraph().asLong(edge.right()), sail));
NodeIRI<N> left = new NodeIRI<>(sail.pathGraph().asLong(edge.left()), sail);
NodeIRI<N> right = new NodeIRI<>(sail.pathGraph().asLong(edge.right()), sail);
return new UnsafeStatement(left, VG.linksReverseToReverse, right);
} else {
return null;
}
}

private Statement reverseToForward(E edge) {
if (sail.pathGraph().isReverseNodeHandle(edge.left()) && !sail.pathGraph().isReverseNodeHandle(edge.right())) {
return new UnsafeStatement(
new NodeIRI<>(sail.pathGraph().asLong(edge.left()), sail), VG.linksReverseToForward,
new NodeIRI<>(sail.pathGraph().asLong(edge.right()), sail));
NodeIRI<N> left = new NodeIRI<>(sail.pathGraph().asLong(edge.left()), sail);
NodeIRI<N> right = new NodeIRI<>(sail.pathGraph().asLong(edge.right()), sail);
return new UnsafeStatement(left, VG.linksReverseToForward, right);
} else {
return null;
}
}



@Override
public double estimatePredicateCardinality(IRI predicate) {
if (predicate == null) {
return sail.pathGraph().nodeCount() + sail.pathGraph().edgeCount();
} else if (RDF.VALUE.equals(predicate)) {
return 10_000; // We really prefer to go linear over all sequences
return sail.pathGraph().nodeCount() * 10; // We really prefer to go linear over all sequences
} else if (RDF.TYPE.equals(predicate)) {
return sail.pathGraph().nodeCount();
} else if (linkPredicates.contains(predicate)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import static io.github.jervenbolleman.handlegraph4j.iterators.AutoClosedIterator.flatMap;
import static io.github.jervenbolleman.handlegraph4j.iterators.AutoClosedIterator.map;
import static io.github.jervenbolleman.handlegraph4j.iterators.AutoClosedIterator.of;
import static swiss.sib.swissprot.sapfhir.statements.StatementProvider.filter;
import static swiss.sib.swissprot.sapfhir.statements.StatementProvider.pathIriFromIri;

import org.eclipse.rdf4j.model.BNode;
Expand Down Expand Up @@ -79,9 +78,8 @@ public AutoClosedIterator<Statement> getStatements(Resource subject, IRI predica
return empty();
} else if (subject == null) {
AutoClosedIterator<P> paths = sail.pathGraph().paths();
var iris = map(paths, p -> new PathIRI<P>(p, sail));
var stats = map(iris, p -> this.getStatements(p, predicate, object));
return flatMap(stats);
var iris = map(paths, p -> this.getStatements(new PathIRI<P>(p, sail), predicate, object));
return flatMap(iris);
} else if (subject instanceof IRI) {
PathIRI<P> pathIRI = pathIriFromIri((IRI) subject, sail);
if (pathIRI == null) {
Expand All @@ -91,9 +89,8 @@ public AutoClosedIterator<Statement> getStatements(Resource subject, IRI predica
} else if (RDFS.LABEL.equals(predicate)) {
return knownSubjectLabelStatements(pathIRI, object);
} else {
var of = concat(knownSubjectTypeStatements(pathIRI, object),
return concat(knownSubjectTypeStatements(pathIRI, object),
knownSubjectLabelStatements(pathIRI, object));
return of;
}
} else {
return empty();
Expand All @@ -103,10 +100,12 @@ public AutoClosedIterator<Statement> getStatements(Resource subject, IRI predica
private AutoClosedIterator<Statement> knownSubjectTypeStatements(PathIRI<P> pathIRI, Value object) {
if (object instanceof BNode || object instanceof Literal) {
return empty();
} else if (object == null || VG.Path.equals(object)) {
Statement stat = new UnsafeStatement(pathIRI, RDF.TYPE, VG.Path);
return of(stat);
} else {
return empty();
}
Statement stat = new UnsafeStatement(pathIRI, RDF.TYPE, VG.Path);
AutoClosedIterator<Statement> stream = of(stat);
return filter(object, stream);
}

private AutoClosedIterator<Statement> knownSubjectLabelStatements(PathIRI<P> pathIRI, Value object) {
Expand All @@ -115,10 +114,12 @@ private AutoClosedIterator<Statement> knownSubjectLabelStatements(PathIRI<P> pat
}
String nameOfPath = sail.pathGraph().nameOfPath(pathIRI.path());
Literal label = sail.getValueFactory().createLiteral(nameOfPath);
Statement stat = new UnsafeStatement(pathIRI, RDFS.LABEL, label);

AutoClosedIterator<Statement> stream = of(stat);
return filter(object, stream);
if (object == null || label.equals(object)) {
Statement stat = new UnsafeStatement(pathIRI, RDFS.LABEL, label);
return of(stat);
} else {
return empty();
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ static <P extends PathHandle, S extends StepHandle> StepIRI<P> stepIriFromIri(IR
* @param sail the backing sail in which the path should be found in.
* @return a NodeIRI or null
*/
static <N extends NodeHandle> NodeIRI<N> nodeIriFromIRI(IRI iri, PathHandleGraphSail<?, ?, N, ?> sail) {
static <N extends NodeHandle> NodeIRI<N> nodeIriFromIri(IRI iri, PathHandleGraphSail<?, ?, N, ?> sail) {
if (iri == null) {
return null;
} else if (iri instanceof NodeIRI ni) {
Expand Down
Loading

0 comments on commit b203bf6

Please sign in to comment.