diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..b9217495 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,36 @@ +# EditorConfig: http://EditorConfig.org +# top-most EditorConfig file +root = true + +# all files defaults +[*] +# Unix-style newlines with a newline ending +end_of_line = lf +insert_final_newline = true +# Set default charset +charset = utf-8 +# 4 space indentation +indent_style = space +indent_size = 4 +# trim whitespaces +trim_trailing_whitespace = true +# always insert final newline +insert_final_newline = true + +[*.md] +trim_trailing_whitespace = false + +# tab for makefiles +[{Makefile, Makefile*, *.xml}] +indent_style = tab +indent_size = 4 + +# 2 spaces for Dockerfiles +[{Dockerfile, Dockerfile*}] +indent_style = space +indent_size = 2 + +# 2 spaces for md, yaml, ttl, etc.. +[*.{md,yml,iml,json,ttl,ts,js,html,css}] +indent_style = space +indent_size = 2 diff --git a/pom.xml b/pom.xml index e11c0a55..dfd549d2 100644 --- a/pom.xml +++ b/pom.xml @@ -1,158 +1,164 @@ - - 4.0.0 - org.aksw - gerbil.nif.transfer - 1.2.8 - NIF transfer library for the General Entity Annotator Benchmark - This project contains classes for transferring documents using NIF. + + 4.0.0 + org.aksw + gerbil.nif.transfer + 1.2.9 + NIF transfer library for the General Entity Annotator Benchmark + This project contains classes for transferring documents using + NIF. - - 1.8 - 1.7.6 - 4.11 - 3.1.0 - UTF-8 - + + 1.8 + 1.7.6 + 4.11 + 3.1.0 + UTF-8 + - - - maven.aksw.internal - University Leipzig, AKSW Maven2 Repository - https://maven.aksw.org/repository/internal - - - maven.aksw.snapshots - University Leipzig, AKSW Maven2 Repository - https://maven.aksw.org/repository/snapshots - - + + + maven.aksw.internal + University Leipzig, AKSW Maven2 Repository + https://maven.aksw.org/repository/internal + + + maven.aksw.snapshots + University Leipzig, AKSW Maven2 Repository + https://maven.aksw.org/repository/snapshots + + - - - - org.apache.jena - jena-core - ${jena.version} - - - org.apache.jena - jena-arq - ${jena.version} - + + + + org.apache.jena + jena-core + ${jena.version} + + + org.apache.jena + jena-arq + ${jena.version} + - - commons-io - commons-io - 2.4 - + + commons-io + commons-io + 2.4 + - - - org.slf4j - slf4j-api - ${slf4j.version} - - - - org.slf4j - slf4j-log4j12 - ${slf4j.version} - + + + org.slf4j + slf4j-api + ${slf4j.version} + + + + org.slf4j + slf4j-log4j12 + ${slf4j.version} + - - - junit - junit - ${junit.version} - test - - + + + junit + junit + ${junit.version} + test + + - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.1 - - ${java.version} - ${java.version} - - - - - org.apache.maven.plugins - maven-javadoc-plugin - 2.10.1 - - private - true - - - - package - - jar - - - - - - - org.apache.maven.plugins - maven-source-plugin - 2.4 - - - attach-sources - - jar - - - - - - - com.mycila - license-maven-plugin - 2.6 - -
com/mycila/maven/plugin/license/templates/LGPL-3.txt
- - ${project.inceptionYear} - usbeck@informatik.uni-leipzig.de - - - **/README - **/LICENSE - **/install_to_gerbil.sh - diagrams/** - repository/** - gerbil_data/** - src/test/resources/** - src/main/resources/** - -
-
-
-
+ + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + ${java.version} + ${java.version} + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.1 + + private + true + + + + package + + jar + + + + + + + org.apache.maven.plugins + maven-source-plugin + 2.4 + + + attach-sources + + jar + + + + + + + com.mycila + license-maven-plugin + 2.6 + +
com/mycila/maven/plugin/license/templates/LGPL-3.txt
+ + ${project.inceptionYear} + usbeck@informatik.uni-leipzig.de + + + **/README + **/LICENSE + **/install_to_gerbil.sh + diagrams/** + repository/** + gerbil_data/** + src/test/resources/** + src/main/resources/** + +
+
+
+
diff --git a/src/main/java/org/aksw/gerbil/io/nif/AnnotationParser.java b/src/main/java/org/aksw/gerbil/io/nif/AnnotationParser.java index b5e1bddd..cbc3fd2b 100644 --- a/src/main/java/org/aksw/gerbil/io/nif/AnnotationParser.java +++ b/src/main/java/org/aksw/gerbil/io/nif/AnnotationParser.java @@ -29,380 +29,294 @@ import org.aksw.gerbil.transfer.nif.Document; import org.aksw.gerbil.transfer.nif.Marking; -import org.aksw.gerbil.transfer.nif.MeaningSpan; +import org.aksw.gerbil.transfer.nif.MarkingBuilder; import org.aksw.gerbil.transfer.nif.ProvenanceInfo; -import org.aksw.gerbil.transfer.nif.Relation; -import org.aksw.gerbil.transfer.nif.data.Annotation; -import org.aksw.gerbil.transfer.nif.data.NamedEntity; import org.aksw.gerbil.transfer.nif.data.ProvenanceInfoImpl; -import org.aksw.gerbil.transfer.nif.data.RelationImpl; -import org.aksw.gerbil.transfer.nif.data.ScoredAnnotation; -import org.aksw.gerbil.transfer.nif.data.ScoredNamedEntity; -import org.aksw.gerbil.transfer.nif.data.ScoredRelationImpl; -import org.aksw.gerbil.transfer.nif.data.ScoredTypedNamedEntity; -import org.aksw.gerbil.transfer.nif.data.SpanImpl; -import org.aksw.gerbil.transfer.nif.data.TypedNamedEntity; -import org.aksw.gerbil.transfer.nif.data.TypedSpanImpl; import org.aksw.gerbil.transfer.nif.vocabulary.ITSRDF; import org.aksw.gerbil.transfer.nif.vocabulary.NIF; import org.aksw.gerbil.transfer.nif.vocabulary.OA; import org.aksw.gerbil.transfer.nif.vocabulary.PROV; import org.apache.jena.datatypes.xsd.XSDDatatype; import org.apache.jena.datatypes.xsd.XSDDateTime; -import org.apache.jena.graph.Node; import org.apache.jena.rdf.model.Literal; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.NodeIterator; import org.apache.jena.rdf.model.Property; +import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.rdf.model.ResIterator; import org.apache.jena.rdf.model.Resource; +import org.apache.jena.util.iterator.ExtendedIterator; import org.apache.jena.vocabulary.RDF; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class AnnotationParser { - private static final Logger LOGGER = LoggerFactory.getLogger(AnnotationParser.class); + private static final Logger LOGGER = LoggerFactory.getLogger(AnnotationParser.class); - private boolean removeUsedProperties; + private boolean removeUsedProperties; - public AnnotationParser() { - this(false); - } + public AnnotationParser() { + this(false); + } - public AnnotationParser(final boolean removeUsedProperties) { - this.removeUsedProperties = removeUsedProperties; - } + public AnnotationParser(final boolean removeUsedProperties) { + this.removeUsedProperties = removeUsedProperties; + } - public void parseAnnotations(final Model nifModel, final Document document, final Resource documentResource) { - // get the annotations from the model + public void parseAnnotations(final Model nifModel, final Document document, final Resource documentResource) { + // get the annotations from the model - List markings = document.getMarkings(); - ResIterator resIter = nifModel.listSubjectsWithProperty(RDF.type, PROV.Activity); - Map provenanceInfos = new HashMap(); - Resource annotationResource; - NodeIterator nodeIter; - while (resIter.hasNext()) { - annotationResource = resIter.next(); - Calendar startedAt = getDateTimeValue(nifModel, annotationResource, PROV.startedAtTime); - Calendar endedAt = getDateTimeValue(nifModel, annotationResource, PROV.endedAtTime); - Set agents = null; + List markings = document.getMarkings(); + ResIterator resIter = nifModel.listSubjectsWithProperty(RDF.type, PROV.Activity); + Map provenanceInfos = new HashMap(); + Resource annotationResource; + while (resIter.hasNext()) { + annotationResource = resIter.next(); + Calendar startedAt = getDateTimeValue(nifModel, annotationResource, PROV.startedAtTime); + Calendar endedAt = getDateTimeValue(nifModel, annotationResource, PROV.endedAtTime); + Set agents = null; - resIter = nifModel.listSubjectsWithProperty(NIF.referenceContext, documentResource); - while (resIter.hasNext()) { - if (agents == null) { - agents = new HashSet<>(); - } - agents.add(resIter.next().toString()); - } - provenanceInfos.put(annotationResource.getURI(), new ProvenanceInfoImpl(startedAt, endedAt, agents)); - } + resIter = nifModel.listSubjectsWithProperty(NIF.referenceContext, documentResource); + while (resIter.hasNext()) { + if (agents == null) { + agents = new HashSet<>(); + } + agents.add(resIter.next().toString()); + } + provenanceInfos.put(annotationResource.getURI(), new ProvenanceInfoImpl(startedAt, endedAt, agents)); + } + // parse annotations pointing to the document + resIter = nifModel.listSubjectsWithProperty(NIF.referenceContext, documentResource); + parseNifAnnotations(nifModel, resIter, provenanceInfos, markings); + // parse annotations to which the document points to + parseNifAnnotations(nifModel, nifModel.listObjectsOfProperty(documentResource, NIF.topic), provenanceInfos, + markings); + parseOAAnnotations(nifModel, documentResource, provenanceInfos, markings); + } - Set usedProvInfos = new HashSet<>(); - int start, end; - Set entityUris; - double confidence; - resIter = nifModel.listSubjectsWithProperty(NIF.referenceContext, documentResource); - ProvenanceInfo localProv; - Marking marking; - while (resIter.hasNext()) { + protected void parseNifAnnotations(Model nifModel, ExtendedIterator iterator, + Map provenanceInfos, List markings) { + Resource annotationResource; + NodeIterator nodeIter; + MarkingBuilder builder = Marking.builder(); + while (iterator.hasNext()) { + builder.clear(); + annotationResource = iterator.next().asResource(); - annotationResource = resIter.next(); + builder.setProvenance(findProvenance(nifModel, annotationResource, provenanceInfos)); - marking = null; - localProv = null; - nodeIter = nifModel.listObjectsOfProperty(annotationResource, PROV.wasGeneratedBy); - if (nodeIter.hasNext()) { - String provUri = nodeIter.next().asResource().getURI(); - if (provenanceInfos.containsKey(provUri)) { - localProv = provenanceInfos.get(provUri); - } else { - LOGGER.warn("Found a link to a non existing provenance information \"{}\". It will be ignored", - provUri); - } - } - if (localProv == null) { - ResIterator resIter2 = nifModel.listResourcesWithProperty(PROV.generated, annotationResource); - if (resIter2.hasNext()) { - String provUri = resIter2.next().getURI(); - if (provenanceInfos.containsKey(provUri)) { - localProv = provenanceInfos.get(provUri); - } else { - LOGGER.warn("Found a link to a non existing provenance information \"{}\". It will be ignored", - provUri); - } - } - } + nodeIter = nifModel.listObjectsOfProperty(annotationResource, NIF.beginIndex); + if (nodeIter.hasNext()) { + builder.setStart(nodeIter.next().asLiteral().getInt()); + } + nodeIter = nifModel.listObjectsOfProperty(annotationResource, NIF.endIndex); + if (nodeIter.hasNext()) { + builder.setEnd(nodeIter.next().asLiteral().getInt()); + } + // nif:Word is not really used + // boolean isWord = nifModel.contains(annotationResource, RDF.type, NIF.Word); + nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taIdentRef); + if (nodeIter.hasNext()) { + Resource meaning; + while (nodeIter.hasNext()) { + meaning = nodeIter.next().asResource(); + builder.addMeaning(meaning.getURI()); + addTypeInformation(nifModel, meaning, builder); + } + } + nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taClassRef); + while (nodeIter.hasNext()) { + builder.addType(nodeIter.next().toString()); + } + nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taConfidence); + if (nodeIter.hasNext()) { + builder.setConfidence(nodeIter.next().asLiteral().getDouble()); + } + // Check whether it is a relation + if (nifModel.contains(annotationResource, RDF.type, RDF.Statement)) { + nodeIter = nifModel.listObjectsOfProperty(annotationResource, RDF.subject); + if (nodeIter.hasNext()) { + builder.addSubject(nodeIter.next().asNode().toString()); + } + nodeIter = nifModel.listObjectsOfProperty(annotationResource, RDF.predicate); + if (nodeIter.hasNext()) { + builder.addPredicate(nodeIter.next().asNode().toString()); + } + nodeIter = nifModel.listObjectsOfProperty(annotationResource, RDF.object); + if (nodeIter.hasNext()) { + builder.addObject(nodeIter.next().asNode().toString()); + } + } + Marking marking = builder.build(); + if (marking != null) { + markings.add(marking); + } else { + // The annotation is incomplete + LOGGER.warn("Found an incomplete annotation resource (\"" + annotationResource.getURI() + + "\"). This annotation will be ignored."); + } + if (removeUsedProperties) { + nifModel.removeAll(annotationResource, null, null); + } + } + } - start = end = -1; - nodeIter = nifModel.listObjectsOfProperty(annotationResource, NIF.beginIndex); - if (nodeIter.hasNext()) { - start = nodeIter.next().asLiteral().getInt(); - } - nodeIter = nifModel.listObjectsOfProperty(annotationResource, NIF.endIndex); - if (nodeIter.hasNext()) { - end = nodeIter.next().asLiteral().getInt(); - } - if ((start >= 0) && (end >= 0)) { - boolean isWord = nifModel.contains(annotationResource, RDF.type, NIF.Word); + protected ProvenanceInfo findProvenance(Model nifModel, Resource resource, + Map provenanceInfos) { + NodeIterator nodeIter = nifModel.listObjectsOfProperty(resource, PROV.wasGeneratedBy); + if (nodeIter.hasNext()) { + String provUri = nodeIter.next().asResource().getURI(); + if (provenanceInfos.containsKey(provUri)) { + return provenanceInfos.get(provUri); + } else { + LOGGER.warn("Found a link to a non existing provenance information \"{}\". It will be ignored", + provUri); + } + } + ResIterator resIter = nifModel.listResourcesWithProperty(PROV.generated, resource); + if (resIter.hasNext()) { + String provUri = resIter.next().getURI(); + if (provenanceInfos.containsKey(provUri)) { + return provenanceInfos.get(provUri); + } else { + LOGGER.warn("Found a link to a non existing provenance information \"{}\". It will be ignored", + provUri); + } + } + return null; + } - nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taIdentRef); - if (nodeIter.hasNext()) { - entityUris = new HashSet<>(); - while (nodeIter.hasNext()) { - entityUris.add(nodeIter.next().toString()); - } - nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taClassRef); - if (nodeIter.hasNext()) { - Set types = new HashSet<>(); - while (nodeIter.hasNext()) { - types.add(nodeIter.next().toString()); - } - nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taConfidence); - if (nodeIter.hasNext()) { - confidence = nodeIter.next().asLiteral().getDouble(); - marking = addTypeInformation(new ScoredTypedNamedEntity(start, end - start, entityUris, - types, confidence, isWord), nifModel); - } else { - // It has been typed without a confidence - marking = addTypeInformation( - new TypedNamedEntity(start, end - start, entityUris, types, isWord), nifModel); - } - } else { - nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taConfidence); - if (nodeIter.hasNext()) { - confidence = nodeIter.next().asLiteral().getDouble(); - marking = addTypeInformationIfPossible( - new ScoredNamedEntity(start, end - start, entityUris, confidence, isWord), - nifModel); - } else { - // It has been disambiguated without a confidence - marking = addTypeInformationIfPossible( - new NamedEntity(start, end - start, entityUris, isWord), nifModel); - } - } - } else { - nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taClassRef); - if (nodeIter.hasNext()) { - Set types = new HashSet<>(); - while (nodeIter.hasNext()) { - types.add(nodeIter.next().toString()); - } - // It has been typed without a confidence - marking = new TypedSpanImpl(start, end - start, types, isWord); - } else { - // It is a named entity that hasn't been disambiguated - marking = new SpanImpl(start, end - start, isWord); - } - } - // FIXME scored Span is missing - } else { - // Check whether it is a relation - if (nifModel.contains(annotationResource, RDF.type, RDF.Statement)) { - Node s = null, p = null, o = null; - nodeIter = nifModel.listObjectsOfProperty(annotationResource, RDF.subject); - if (nodeIter.hasNext()) { - s = nodeIter.next().asNode(); - } - nodeIter = nifModel.listObjectsOfProperty(annotationResource, RDF.predicate); - if (nodeIter.hasNext()) { - p = nodeIter.next().asNode(); - } - nodeIter = nifModel.listObjectsOfProperty(annotationResource, RDF.object); - if (nodeIter.hasNext()) { - o = nodeIter.next().asNode(); - } - if ((s != null) && (p != null) && (o != null)) { - nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taConfidence); - if (nodeIter.hasNext()) { - confidence = nodeIter.next().asLiteral().getDouble(); - marking = new ScoredRelationImpl(new Annotation(s.toString()), - new Annotation(p.toString()), - new Annotation(o.toString()), confidence); - } else { - // It has been disambiguated without a confidence - marking = new RelationImpl(new Annotation(s.toString()), new Annotation(p.toString()), new Annotation(o.toString())); - } - } else { - // The relation is incomplete - LOGGER.warn("Found an incomplete relation resource (\"" + annotationResource.getURI() - + "\") with a missing subject, predicate or object. This annotation will be ignored."); - } - } else { - LOGGER.warn("Found an annotation resource (\"" + annotationResource.getURI() - + "\") without a start or end index. This annotation will be ignored."); - } - } - if (marking != null) { - if (localProv != null) { - marking.setProvenanceInfo(localProv); - usedProvInfos.add(localProv); - } - markings.add(marking); - } - if (removeUsedProperties) { - nifModel.removeAll(annotationResource, null, null); - } - } + protected void parseOAAnnotations(Model nifModel, Resource documentResource, + Map provenanceInfos, List markings) { + ResIterator resIter = nifModel.listSubjectsWithProperty(OA.hasSource, documentResource); + MarkingBuilder builder = Marking.builder(); + Marking marking = null; + while (resIter.hasNext()) { + // Subject is blank node object for hasTarget + ResIterator sourceIter = nifModel.listSubjectsWithProperty(OA.hasTarget, resIter.next()); + while (sourceIter.hasNext()) { + // Subject is blank node for one relation annotation + Resource relationStmtNode = sourceIter.next(); + if (nifModel.contains(relationStmtNode, RDF.type, RDF.Statement)) { + builder.clear(); + // get statements + builder.addSubject( + nifModel.listObjectsOfProperty(relationStmtNode, RDF.subject).next().asNode().toString()); + builder.addPredicate( + nifModel.listObjectsOfProperty(relationStmtNode, RDF.predicate).next().asNode().toString()); + builder.addObject( + nifModel.listObjectsOfProperty(relationStmtNode, RDF.object).next().asNode().toString()); + marking = builder.build(); + if (marking != null) { + markings.add(marking); + } + } + } + } + } - NodeIterator annotationIter = nifModel.listObjectsOfProperty(documentResource, NIF.topic); - while (annotationIter.hasNext()) { - marking = null; - localProv = null; - annotationResource = annotationIter.next().asResource(); - nodeIter = nifModel.listObjectsOfProperty(annotationResource, PROV.wasGeneratedBy); - if (nodeIter.hasNext()) { - String provUri = nodeIter.next().asResource().getURI(); - if (provenanceInfos.containsKey(provUri)) { - localProv = provenanceInfos.get(provUri); - } else { - LOGGER.warn("Found a link to a non existing provenance information \"{}\". It will be ignored", - provUri); - } - } - nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taIdentRef); - if (nodeIter.hasNext()) { - entityUris = new HashSet<>(); - while (nodeIter.hasNext()) { - entityUris.add(nodeIter.next().toString()); - } - nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taConfidence); - if (nodeIter.hasNext()) { - confidence = nodeIter.next().asLiteral().getDouble(); - marking = new ScoredAnnotation(entityUris, confidence); - } else { - marking = new Annotation(entityUris); - } - } - if (marking != null) { - if (localProv != null) { - marking.setProvenanceInfo(localProv); - usedProvInfos.add(localProv); - } - markings.add(marking); - } - } - resIter = nifModel.listSubjectsWithProperty(OA.hasSource, documentResource); - while (resIter.hasNext()) { - // Subject is blank node object for hasTarget - ResIterator sourceIter = nifModel.listSubjectsWithProperty(OA.hasTarget, resIter.next()); - while (sourceIter.hasNext()) { - // Subject is blank node for one relation annotation - Resource relationStmtNode = sourceIter.next(); - if (nifModel.contains(relationStmtNode, RDF.type, RDF.Statement)) { - //get statements - Node subject = nifModel.listObjectsOfProperty(relationStmtNode, RDF.subject).next().asNode(); - Node predicate = nifModel.listObjectsOfProperty(relationStmtNode, RDF.predicate).next().asNode(); - Node object = nifModel.listObjectsOfProperty(relationStmtNode, RDF.object).next().asNode(); - Relation relation = new RelationImpl(new Annotation(subject.toString()), - new Annotation(predicate.toString()), - new Annotation(object.toString())); - markings.add(relation); - } - - } - } - markings.addAll(usedProvInfos); - } +// protected void parseDirectAnnotations(Model nifModel, Resource documentResource, +// Map provenanceInfos, List markings) { +// NodeIterator annotationIter = nifModel.listObjectsOfProperty(documentResource, NIF.topic); +// while (annotationIter.hasNext()) { +// marking = null; +// localProv = null; +// annotationResource = annotationIter.next().asResource(); +// nodeIter = nifModel.listObjectsOfProperty(annotationResource, PROV.wasGeneratedBy); +// if (nodeIter.hasNext()) { +// String provUri = nodeIter.next().asResource().getURI(); +// if (provenanceInfos.containsKey(provUri)) { +// localProv = provenanceInfos.get(provUri); +// } else { +// LOGGER.warn("Found a link to a non existing provenance information \"{}\". It will be ignored", +// provUri); +// } +// } +// nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taIdentRef); +// if (nodeIter.hasNext()) { +// entityUris = new HashSet<>(); +// while (nodeIter.hasNext()) { +// entityUris.add(nodeIter.next().toString()); +// } +// nodeIter = nifModel.listObjectsOfProperty(annotationResource, ITSRDF.taConfidence); +// if (nodeIter.hasNext()) { +// confidence = nodeIter.next().asLiteral().getDouble(); +// marking = new ScoredAnnotation(entityUris, confidence); +// } else { +// marking = new Annotation(entityUris); +// } +// } +// } +// markings.addAll(usedProvInfos); +// } - private MeaningSpan addTypeInformationIfPossible(final NamedEntity ne, final Model nifModel) { - TypedNamedEntity typedNE = new TypedNamedEntity(ne.getStartPosition(), ne.getLength(), ne.getUris(), - new HashSet(), ne.getIsWord()); - addTypeInformation(typedNE, nifModel); - if (typedNE.getTypes().size() > 0) { - return typedNE; - } else { - return ne; - } - } + protected void addTypeInformation(final Model nifModel, Resource meaning, MarkingBuilder builder) { + NodeIterator nodeIter = nifModel.listObjectsOfProperty(meaning, RDF.type); + while (nodeIter.hasNext()) { + builder.addType(nodeIter.next().asResource().getURI()); + } + } - private MeaningSpan addTypeInformationIfPossible(final ScoredNamedEntity ne, final Model nifModel) { - ScoredTypedNamedEntity typedNE = new ScoredTypedNamedEntity(ne.getStartPosition(), ne.getLength(), ne.getUris(), - new HashSet(), ne.getConfidence(), ne.getIsWord()); - addTypeInformation(typedNE, nifModel); - if (typedNE.getTypes().size() > 0) { - return typedNE; - } else { - return ne; - } - } + /** + * Returns the object as {@link Calendar} of the first triple that has the given + * subject and predicate and that can be found in the given model. + * + * @param model the model that should contain the triple + * @param subject the subject of the triple. null works like a + * wildcard. + * @param predicate the predicate of the triple. null works like a + * wildcard. + * @return object of the triple as {@link Calendar} or null if such + * a triple couldn't be found or the value can not be read as XSDDate + */ + public static Calendar getDateValue(Model model, Resource subject, Property predicate) { + Calendar result = getCalendarValue(model, subject, predicate, XSDDatatype.XSDdate); + if (result != null) { + result.setTimeZone(TimeZone.getDefault()); + } + return result; + } - private TypedNamedEntity addTypeInformation(final TypedNamedEntity typedNE, final Model nifModel) { - for (String uri : typedNE.getUris()) { - NodeIterator nodeIter = nifModel.listObjectsOfProperty(nifModel.getResource(uri), RDF.type); - Set types = typedNE.getTypes(); - while (nodeIter.hasNext()) { - types.add(nodeIter.next().asResource().getURI()); - } - } - return typedNE; - } + /** + * Returns the object as {@link Calendar} of the first triple that has the given + * subject and predicate and that can be found in the given model. + * + * @param model the model that should contain the triple + * @param subject the subject of the triple. null works like a + * wildcard. + * @param predicate the predicate of the triple. null works like a + * wildcard. + * @return object of the triple as {@link Calendar} or null if such + * a triple couldn't be found or the value can not be read as + * XSDDateTime + */ + public static Calendar getDateTimeValue(Model model, Resource subject, Property predicate) { + return getCalendarValue(model, subject, predicate, XSDDatatype.XSDdateTime); + } - /** - * Returns the object as {@link Calendar} of the first triple that has the given - * subject and predicate and that can be found in the given model. - * - * @param model - * the model that should contain the triple - * @param subject - * the subject of the triple. null works like a - * wildcard. - * @param predicate - * the predicate of the triple. null works like a - * wildcard. - * @return object of the triple as {@link Calendar} or null if such - * a triple couldn't be found or the value can not be read as XSDDate - */ - public static Calendar getDateValue(Model model, Resource subject, Property predicate) { - Calendar result = getCalendarValue(model, subject, predicate, XSDDatatype.XSDdate); - if (result != null) { - result.setTimeZone(TimeZone.getDefault()); - } - return result; - } - - /** - * Returns the object as {@link Calendar} of the first triple that has the given - * subject and predicate and that can be found in the given model. - * - * @param model - * the model that should contain the triple - * @param subject - * the subject of the triple. null works like a - * wildcard. - * @param predicate - * the predicate of the triple. null works like a - * wildcard. - * @return object of the triple as {@link Calendar} or null if such - * a triple couldn't be found or the value can not be read as - * XSDDateTime - */ - public static Calendar getDateTimeValue(Model model, Resource subject, Property predicate) { - return getCalendarValue(model, subject, predicate, XSDDatatype.XSDdateTime); - } - - protected static Calendar getCalendarValue(Model model, Resource subject, Property predicate, - XSDDatatype dateType) { - if (model == null) { - return null; - } - Literal literal = null; - NodeIterator nodeIter = model.listObjectsOfProperty(subject, predicate); - if (nodeIter.hasNext()) { - literal = nodeIter.next().asLiteral(); - } - if (literal != null) { - try { - Object o = dateType.parse(literal.getString()); - if (o instanceof XSDDateTime) { - return ((XSDDateTime) o).asCalendar(); - } - } catch (Exception e) { - // nothing to do - LOGGER.debug("Couldn't parse " + dateType.getURI() + ". Returning null.", e); - } - } - return null; - } + protected static Calendar getCalendarValue(Model model, Resource subject, Property predicate, + XSDDatatype dateType) { + if (model == null) { + return null; + } + Literal literal = null; + NodeIterator nodeIter = model.listObjectsOfProperty(subject, predicate); + if (nodeIter.hasNext()) { + literal = nodeIter.next().asLiteral(); + } + if (literal != null) { + try { + Object o = dateType.parse(literal.getString()); + if (o instanceof XSDDateTime) { + return ((XSDDateTime) o).asCalendar(); + } + } catch (Exception e) { + // nothing to do + LOGGER.debug("Couldn't parse " + dateType.getURI() + ". Returning null.", e); + } + } + return null; + } } diff --git a/src/main/java/org/aksw/gerbil/transfer/nif/AbstractNIFDocumentCreator.java b/src/main/java/org/aksw/gerbil/transfer/nif/AbstractNIFDocumentCreator.java old mode 100755 new mode 100644 diff --git a/src/main/java/org/aksw/gerbil/transfer/nif/AbstractNIFDocumentParser.java b/src/main/java/org/aksw/gerbil/transfer/nif/AbstractNIFDocumentParser.java old mode 100755 new mode 100644 diff --git a/src/main/java/org/aksw/gerbil/transfer/nif/Marking.java b/src/main/java/org/aksw/gerbil/transfer/nif/Marking.java index 29ed38f2..e89df926 100644 --- a/src/main/java/org/aksw/gerbil/transfer/nif/Marking.java +++ b/src/main/java/org/aksw/gerbil/transfer/nif/Marking.java @@ -27,10 +27,14 @@ public interface Marking extends Cloneable { @Override public String toString(); - + public Object clone() throws CloneNotSupportedException; public void setProvenanceInfo(ProvenanceInfo provencance); - + public ProvenanceInfo getProvenanceInfo(); + + public static MarkingBuilder builder() { + return new MarkingBuilder(); + } } diff --git a/src/main/java/org/aksw/gerbil/transfer/nif/MarkingBuilder.java b/src/main/java/org/aksw/gerbil/transfer/nif/MarkingBuilder.java new file mode 100644 index 00000000..7fbd2024 --- /dev/null +++ b/src/main/java/org/aksw/gerbil/transfer/nif/MarkingBuilder.java @@ -0,0 +1,342 @@ +package org.aksw.gerbil.transfer.nif; + +import java.util.HashSet; +import java.util.Set; + +import org.aksw.gerbil.transfer.nif.data.Annotation; +import org.aksw.gerbil.transfer.nif.data.NamedEntity; +import org.aksw.gerbil.transfer.nif.data.RelationImpl; +import org.aksw.gerbil.transfer.nif.data.ScoredAnnotation; +import org.aksw.gerbil.transfer.nif.data.ScoredNamedEntity; +import org.aksw.gerbil.transfer.nif.data.ScoredRelationImpl; +import org.aksw.gerbil.transfer.nif.data.ScoredSpanImpl; +import org.aksw.gerbil.transfer.nif.data.ScoredTypedNamedEntity; +import org.aksw.gerbil.transfer.nif.data.SpanImpl; +import org.aksw.gerbil.transfer.nif.data.TypedNamedEntity; +import org.aksw.gerbil.transfer.nif.data.TypedSpanImpl; +import org.apache.commons.lang3.NotImplementedException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MarkingBuilder { + + private static final Logger LOGGER = LoggerFactory.getLogger(MarkingBuilder.class); + + protected Set meanings; + protected Set types; + protected int start = -1; + protected int length = -1; + protected int end = -1; + protected double confidence; + protected boolean hasConfidence = false; + protected ProvenanceInfo provenance = null; + protected Set subject = null; + protected Set predicate = null; + protected Set object = null; + + public void clear() { + meanings = null; + types = null; + start = -1; + length = -1; + end = -1; + hasConfidence = false; + provenance = null; + subject = null; + predicate = null; + object = null; + } + + /** + * @return the meanings + */ + public Set getMeanings() { + return meanings; + } + + /** + * @param meanings the meanings to set + */ + public void setMeanings(Set meanings) { + this.meanings = meanings; + } + + /** + * @param meaning the meaning to be added + */ + public void addMeaning(String meaning) { + if (meanings == null) { + meanings = new HashSet<>(); + } + meanings.add(meaning); + } + + /** + * @return the types + */ + public Set getTypes() { + return types; + } + + /** + * @param types the types to set + */ + public void setTypes(Set types) { + this.types = types; + } + + /** + * @param meanings the type to be added + */ + public void addType(String type) { + if (types == null) { + types = new HashSet<>(); + } + types.add(type); + } + + /** + * @return the start + */ + public int getStart() { + return start; + } + + /** + * @param start the start to set + */ + public void setStart(int start) { + this.start = start; + } + + /** + * @return the length + */ + public int getLength() { + return length; + } + + /** + * @param length the length to set + */ + public void setLength(int length) { + this.length = length; + } + + /** + * @return the end + */ + public int getEnd() { + return end; + } + + /** + * @param end the end to set + */ + public void setEnd(int end) { + this.end = end; + } + + /** + * @return the confidence + */ + public double getConfidence() { + return confidence; + } + + /** + * @return the hasConfidence flag + */ + public boolean hasConfidence() { + return hasConfidence; + } + + /** + * @param confidence the confidence to set + */ + public void setConfidence(double confidence) { + this.hasConfidence = true; + this.confidence = confidence; + } + + /** + * @return the provenance + */ + public ProvenanceInfo getProvenance() { + return provenance; + } + + /** + * @param provenance the provenance to set + */ + public void setProvenance(ProvenanceInfo provenance) { + this.provenance = provenance; + } + + /** + * @return the subject + */ + public Set getSubject() { + return subject; + } + + /** + * @param subject the subject to set + */ + public void setSubject(Set subject) { + this.subject = subject; + } + + /** + * @param subject the subject to be added + */ + public void addSubject(String subject) { + if (this.subject == null) { + this.subject = new HashSet<>(); + } + this.subject.add(subject); + } + + /** + * @return the predicate + */ + public Set getPredicate() { + return predicate; + } + + /** + * @param predicate the predicate to set + */ + public void setPredicate(Set predicate) { + this.predicate = predicate; + } + + /** + * @param predicate the predicate to be added + */ + public void addPredicate(String predicate) { + if (this.predicate == null) { + this.predicate = new HashSet<>(); + } + this.predicate.add(predicate); + } + + /** + * @return the object + */ + public Set getObject() { + return object; + } + + /** + * @param object the object to set + */ + public void setObject(Set object) { + this.object = object; + } + + /** + * @param object the object to be added + */ + public void addObject(String object) { + if (this.object == null) { + this.object = new HashSet<>(); + } + this.object.add(object); + } + + public Marking build() { + Marking createdMarking = null; + // If this is a relation + if ((subject != null) && (predicate != null) && (object != null)) { + createdMarking = buildRelation(); + } + // If this marking has a meaning + if (meanings != null) { + // If there is a type + if (types != null) { + if ((start >= 0) && ((length >= 0) || (end >= 0))) { + // Calculate length if not present + if (length < 0) { + length = end - start; + } + if (hasConfidence) { + createdMarking = new ScoredTypedNamedEntity(start, length, meanings, types, confidence); + } else { + createdMarking = new TypedNamedEntity(start, length, meanings, types); + } + } else { + // We don't have this type of marking + throw new NotImplementedException( + "There is no implementation for a typed meaning without positional information."); + } + } else { + if ((start >= 0) && ((length >= 0) || (end >= 0))) { + // Calculate length if not present + if (length < 0) { + length = end - start; + } + if (hasConfidence) { + createdMarking = new ScoredNamedEntity(start, length, meanings, confidence); + } else { + createdMarking = new NamedEntity(start, length, meanings); + } + } else { + if (hasConfidence) { + createdMarking = new ScoredAnnotation(meanings, confidence); + } else { + createdMarking = new Annotation(meanings); + } + } + } // has type + } else { + // There is no meaning + if (types != null) { + if ((start >= 0) && ((length >= 0) || (end >= 0))) { + // Calculate length if not present + if (length < 0) { + length = end - start; + } + if (hasConfidence) { + // We don't have this type of marking + throw new NotImplementedException("There is no implementation for a ScoredTypedSpan."); + } else { + createdMarking = new TypedSpanImpl(start, length, types); + } + } else { + // We don't have this type of marking + throw new NotImplementedException( + "There is no implementation for a typed marking without positional information."); + } + } else { + if ((start >= 0) && ((length >= 0) || (end >= 0))) { + // Calculate length if not present + if (length < 0) { + length = end - start; + } + if (hasConfidence) { + createdMarking = new ScoredSpanImpl(start, length, confidence); + } else { + createdMarking = new SpanImpl(start, length); + } + } else { + LOGGER.warn("Not enough information to create a Marking. Returning null."); + } + } // has type + } // has meaning + // If we have any provenance information + if ((createdMarking != null) && (provenance != null)) { + createdMarking.setProvenanceInfo(provenance); + } + return createdMarking; + } + + protected Relation buildRelation() { + if (hasConfidence) { + return new ScoredRelationImpl(new Annotation(subject), new Annotation(predicate), new Annotation(object), + confidence); + } else { + return new RelationImpl(new Annotation(subject), new Annotation(predicate), new Annotation(object)); + } + } +} diff --git a/src/main/java/org/aksw/gerbil/transfer/nif/NIFDocumentCreator.java b/src/main/java/org/aksw/gerbil/transfer/nif/NIFDocumentCreator.java old mode 100755 new mode 100644 diff --git a/src/main/java/org/aksw/gerbil/transfer/nif/NIFDocumentParser.java b/src/main/java/org/aksw/gerbil/transfer/nif/NIFDocumentParser.java old mode 100755 new mode 100644 diff --git a/src/main/java/org/aksw/gerbil/transfer/nif/TurtleNIFDocumentCreator.java b/src/main/java/org/aksw/gerbil/transfer/nif/TurtleNIFDocumentCreator.java old mode 100755 new mode 100644 diff --git a/src/main/java/org/aksw/gerbil/transfer/nif/TurtleNIFDocumentParser.java b/src/main/java/org/aksw/gerbil/transfer/nif/TurtleNIFDocumentParser.java old mode 100755 new mode 100644 diff --git a/src/main/java/org/aksw/gerbil/transfer/nif/data/ProvenanceInfoImpl.java b/src/main/java/org/aksw/gerbil/transfer/nif/data/ProvenanceInfoImpl.java index 18dd2442..ea5fcc8e 100644 --- a/src/main/java/org/aksw/gerbil/transfer/nif/data/ProvenanceInfoImpl.java +++ b/src/main/java/org/aksw/gerbil/transfer/nif/data/ProvenanceInfoImpl.java @@ -99,5 +99,5 @@ public String toString() { builder.append("]"); return builder.toString(); } - + }