Skip to content

Commit

Permalink
Implement #375 OWLZip reader and writer
Browse files Browse the repository at this point in the history
YAML format not included in version 5
  • Loading branch information
ignazio1977 committed Mar 24, 2018
1 parent d5d496b commit 0c677ae
Show file tree
Hide file tree
Showing 11 changed files with 958 additions and 53 deletions.
19 changes: 18 additions & 1 deletion api/src/main/java/org/semanticweb/owlapi/io/DocumentSources.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.HttpURLConnection;
import java.net.JarURLConnection;
import java.net.MalformedURLException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLConnection;
Expand Down Expand Up @@ -114,6 +116,15 @@ public static InputStream wrapInput(OWLOntologyDocumentSource source,
OWLOntologyLoaderConfiguration configuration) throws OWLOntologyInputSourceException {
Optional<InputStream> input = source.getInputStream();
if (!input.isPresent() && !source.hasAlredyFailedOnIRIResolution()) {
if (source.getDocumentIRI().getNamespace().startsWith("jar:")) {
try {
return streamFromJar(source.getDocumentIRI()).getInputStream();
} catch (IOException e) {
source.setIRIResolutionFailed(true);
throw new OWLParserException(e);
}
}

Optional<String> headers = source.getAcceptHeaders();
if (headers.isPresent()) {
input = getInputStream(source.getDocumentIRI(), configuration, headers.get());
Expand All @@ -127,6 +138,11 @@ public static InputStream wrapInput(OWLOntologyDocumentSource source,
throw new OWLOntologyInputSourceException("No input reader can be found");
}

protected static JarURLConnection streamFromJar(IRI documentIRI)
throws IOException, MalformedURLException {
return (JarURLConnection) new URL(documentIRI.toString()).openConnection();
}

/**
* A convenience method that obtains an input stream from a URI. This method sets up the correct
* request type and wraps the input stream within a buffered input stream.
Expand Down Expand Up @@ -171,7 +187,8 @@ public static Optional<InputStream> getInputStream(IRI documentIRI,
actualAcceptHeaders += LAST_REQUEST_TYPE;
}
conn.addRequestProperty("Accept", actualAcceptHeaders);
if (config.getAuthorizationValue() != null && !config.getAuthorizationValue().isEmpty()) {
if (config.getAuthorizationValue() != null
&& !config.getAuthorizationValue().isEmpty()) {
conn.setRequestProperty("Authorization", config.getAuthorizationValue());
}
if (config.isAcceptingHTTPCompression()) {
Expand Down
122 changes: 71 additions & 51 deletions api/src/main/java/org/semanticweb/owlapi/util/AutoIRIMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import static org.semanticweb.owlapi.util.OWLAPIPreconditions.checkNotNull;
import static org.semanticweb.owlapi.util.OWLAPIPreconditions.verifyNotNull;

import com.google.common.base.Splitter;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
Expand All @@ -27,13 +26,16 @@
import java.io.Reader;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.annotation.Nullable;

import org.semanticweb.owlapi.annotations.HasPriority;
import org.semanticweb.owlapi.io.DocumentSources;
import org.semanticweb.owlapi.model.IRI;
Expand All @@ -46,10 +48,14 @@
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import com.google.common.base.Splitter;

/**
* A mapper which given a root folder attempts to automatically discover and map
* files to ontologies. The mapper is only capable of mapping ontologies in
* RDF/XML and OWL/XML (other serialisations are not supported).
* A mapper which given a root folder attempts to automatically discover and map files to
* ontologies. The mapper is capable of mapping ontologies in RDF/XML, OWL/XML, Manchester OWL
* Syntax, Functional Syntax and OBO (other serialisations are not supported). Zip and jar files
* containing ontologies are supported, either as main argument to the constructor or as content of
* the root folder.
*
* @author Matthew Horridge, The University Of Manchester, Bio-Health Informatics Group
* @since 2.0.0
Expand All @@ -59,7 +65,8 @@ public class AutoIRIMapper extends DefaultHandler implements OWLOntologyIRIMappe

static final Pattern pattern = Pattern.compile("Ontology\\(<([^>]+)>");
private static final Logger LOGGER = LoggerFactory.getLogger(AutoIRIMapper.class);
private final Set<String> fileExtensions = new HashSet<>();
private final Set<String> fileExtensions =
new HashSet<>(Arrays.asList(".owl", ".xml", ".rdf", ".omn", ".ofn"));
private final boolean recursive;
private final Map<String, OntologyRootElementHandler> handlerMap = createMap();
private final Map<IRI, IRI> ontologyIRI2PhysicalURIMap = createMap();
Expand All @@ -70,44 +77,48 @@ public class AutoIRIMapper extends DefaultHandler implements OWLOntologyIRIMappe
private transient File currentFile;

/**
* Creates an auto-mapper which examines ontologies that reside in the
* specified root folder (and possibly sub-folders).
* Creates an auto-mapper which examines ontologies that reside in the specified root folder
* (and possibly sub-folders).
*
* @param rootDirectory The root directory which should be searched for ontologies.
* @param rootDirectory The root directory which should be searched for ontologies; this can
* also be a zip/jar file containing ontologies. If root is actually a folder, zip/jar
* files included in the folder are parsed for ontologies. The zip parsing is delegated
* to ZipIRIMapper.
* @param recursive Sub directories will be searched recursively if {@code true}.
*/
public AutoIRIMapper(File rootDirectory, boolean recursive) {
directoryPath = checkNotNull(rootDirectory, "rootDirectory cannot be null")
.getAbsolutePath();
directoryPath =
checkNotNull(rootDirectory, "rootDirectory cannot be null").getAbsolutePath();
this.recursive = recursive;
fileExtensions.add(".owl");
fileExtensions.add(".xml");
fileExtensions.add(".rdf");
fileExtensions.add(".omn");
fileExtensions.add(".ofn");
mapped = false;
/**
* A handler to handle RDF/XML files. The xml:base (if present) is taken
* to be the ontology URI of the ontology document being parsed.
* A handler to handle RDF/XML files. The xml:base (if present) is taken to be the ontology
* URI of the ontology document being parsed.
*/
handlerMap.put(Namespaces.RDF + "RDF", attributes -> {
String baseValue = attributes.getValue(Namespaces.XML.toString(), "base");
if (baseValue == null) {
return null;
}
return IRI.create(baseValue);
});
handlerMap.put(Namespaces.RDF + "RDF", this::baseIRI);
/** A handler that can handle OWL/XML files. */
handlerMap.put(OWLXMLVocabulary.ONTOLOGY.toString(), attributes -> {
String ontURI = attributes.getValue(Namespaces.OWL.toString(), "ontologyIRI");
if (ontURI == null) {
ontURI = attributes.getValue("ontologyIRI");
}
if (ontURI == null) {
return null;
}
return IRI.create(ontURI);
});
handlerMap.put(OWLXMLVocabulary.ONTOLOGY.toString(), this::ontologyIRI);
}

@Nullable
protected IRI ontologyIRI(Attributes attributes) {
String ontURI = attributes.getValue(Namespaces.OWL.toString(), "ontologyIRI");
if (ontURI == null) {
ontURI = attributes.getValue("ontologyIRI");
}
if (ontURI == null) {
return null;
}
return IRI.create(ontURI);
}

@Nullable
protected IRI baseIRI(Attributes attributes) {
String baseValue = attributes.getValue(Namespaces.XML.toString(), "base");
if (baseValue == null) {
return null;
}
return IRI.create(baseValue);
}

/**
Expand All @@ -125,8 +136,8 @@ protected File getDirectory() {
}

/**
* The mapper only examines files that have specified file extensions. This
* method returns the file extensions that cause a file to be examined.
* The mapper only examines files that have specified file extensions. This method returns the
* file extensions that cause a file to be examined.
*
* @return A {@code Set} of file extensions.
*/
Expand All @@ -135,10 +146,9 @@ public Set<String> getFileExtensions() {
}

/**
* Sets the extensions of files that are to be examined for ontological
* content. (By default the extensions are, owl, xml and rdf). Only files
* that have the specified extensions will be examined to see if they
* contain ontologies.
* Sets the extensions of files that are to be examined for ontological content. (By default the
* extensions are, owl, xml and rdf). Only files that have the specified extensions will be
* examined to see if they contain ontologies.
*
* @param extensions the set of extensions
*/
Expand Down Expand Up @@ -196,6 +206,8 @@ private void processFile(File f) {
if (f.isHidden()) {
return;
}
// if pointed directly at a zip file, map it
parseIfExtensionSupported(f);
File[] files = f.listFiles();
if (files == null) {
return;
Expand All @@ -217,13 +229,24 @@ protected void parseIfExtensionSupported(File file) {
return;
}
String extension = name.substring(lastIndexOf);
if (".obo".equals(extension)) {
if (".zip".equalsIgnoreCase(extension) || ".jar".equalsIgnoreCase(extension)) {
try {
ZipIRIMapper mapper = new ZipIRIMapper(file, "jar:" + file.toURI() + "!/");
mapper.oboMappings().forEach(e -> oboFileMap.put(e.getKey(), e.getValue()));
mapper.iriMappings()
.forEach(e -> ontologyIRI2PhysicalURIMap.put(e.getKey(), e.getValue()));
} catch (IOException e) {
// if we can't parse a file, then we can't map it
LOGGER.debug("Exception reading file", e);
}

} else if (".obo".equalsIgnoreCase(extension)) {
oboFileMap.put(name, IRI.create(file));
} else if (".ofn".equals(extension)) {
} else if (".ofn".equalsIgnoreCase(extension)) {
parseFSSFile(file);
} else if (".omn".equals(extension)) {
} else if (".omn".equalsIgnoreCase(extension)) {
parseManchesterSyntaxFile(file);
} else if (fileExtensions.contains(extension)) {
} else if (fileExtensions.contains(extension.toLowerCase())) {
parseFile(file);
}
}
Expand Down Expand Up @@ -302,8 +325,7 @@ private IRI parseManLine(File file, String line) {

@Override
public void startElement(@Nullable String uri, @Nullable String localName,
@Nullable String qName,
@Nullable Attributes attributes) throws SAXException {
@Nullable String qName, @Nullable Attributes attributes) throws SAXException {
OntologyRootElementHandler handler = handlerMap.get(uri + localName);
if (handler != null) {
IRI ontologyIRI = handler.handle(checkNotNull(attributes));
Expand All @@ -326,15 +348,13 @@ protected void addMapping(IRI ontologyIRI, File file) {
public String toString() {
StringBuilder sb = new StringBuilder("AutoIRIMapper: (");
sb.append(ontologyIRI2PhysicalURIMap.size()).append(" ontologies)\n");
ontologyIRI2PhysicalURIMap
.forEach((k, v) -> sb.append(" ").append(k.toQuotedString()).append(" -> ").append(
v).append('\n'));
ontologyIRI2PhysicalURIMap.forEach((k, v) -> sb.append(" ").append(k.toQuotedString())
.append(" -> ").append(v).append('\n'));
return sb.toString();
}

/**
* A simple interface which extracts an ontology IRI from a set of element
* attributes.
* A simple interface which extracts an ontology IRI from a set of element attributes.
*/
@FunctionalInterface
private interface OntologyRootElementHandler extends Serializable {
Expand Down
Loading

0 comments on commit 0c677ae

Please sign in to comment.