Skip to content

Commit

Permalink
fix (core): Improve Tika Test Coverage by comparing with actual full TTL
Browse files Browse the repository at this point in the history
  • Loading branch information
vorburger committed Sep 25, 2024
1 parent 29e8d0e commit 0b33057
Show file tree
Hide file tree
Showing 11 changed files with 63 additions and 70 deletions.
1 change: 1 addition & 0 deletions java/dev/enola/format/tika/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ junit_tests(
":tika",
"//java/dev/enola/common/io",
"//java/dev/enola/thing:thing_java",
"//java/dev/enola/thing/testlib",
"//test",
],
)
3 changes: 2 additions & 1 deletion java/dev/enola/format/tika/TikaThingConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ public boolean convertInto(ReadableResource resource, ThingsBuilder thingsBuilde
parser.parse(is, handler, metadata, parseContext);
var thing = thingsBuilder.getBuilder(resource.uri().toString());
convertMetadata(metadata, thing);
thing.set("https://enola.dev/content-as-text", sw.toString());
var text = sw.toString().trim();
if (!text.isEmpty()) thing.set("https://enola.dev/content-as-text", text);
return true;

} catch (TikaException | SAXException e) {
Expand Down
8 changes: 7 additions & 1 deletion java/dev/enola/format/tika/TikaThingConverterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import dev.enola.common.io.resource.EmptyResource;
import dev.enola.thing.Thing;
import dev.enola.thing.repo.ThingsBuilder;
import dev.enola.thing.testlib.ThingsSubject;

import org.junit.Test;

Expand Down Expand Up @@ -58,14 +59,19 @@ public void png() throws IOException {
// TODO @Test public void epubEBook() throws IOException {

private void check(String classpath) throws IOException {
var name = "classpath:/" + classpath;

var tb = new ThingsBuilder();
var c = new TikaThingConverter(new ClasspathResource.Provider());

var r = c.convertInto(URI.create("classpath:/" + classpath), tb);
var r = c.convertInto(URI.create(name), tb);
assertThat(r).isTrue();

assertThat(tb.builders()).hasSize(1);
var thing = tb.builders().iterator().next().build();
checkThatAllPredicatesAreAbsoluteURIs(thing);

ThingsSubject.assertThat(tb).isEqualTo(name + ".ttl");
}

private void checkThatAllPredicatesAreAbsoluteURIs(Thing thing) {
Expand Down
1 change: 1 addition & 0 deletions java/dev/enola/thing/testlib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ java_library(
visibility = ["//:__subpackages__"],
deps = [
"//java/dev/enola/common/io",
"//java/dev/enola/common/io/testlib",
"//java/dev/enola/rdf/io",
"//java/dev/enola/thing:thing_java",
"@maven//:com_google_errorprone_error_prone_annotations",
Expand Down
54 changes: 0 additions & 54 deletions java/dev/enola/thing/testlib/ModelSubject.java

This file was deleted.

19 changes: 15 additions & 4 deletions java/dev/enola/thing/testlib/ThingsSubject.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,22 @@
import com.google.common.truth.Subject;

import dev.enola.common.io.resource.ClasspathResource;
import dev.enola.common.io.resource.MemoryResource;
import dev.enola.common.io.resource.ResourceProvider;
import dev.enola.common.io.testlib.ResourceSubject;
import dev.enola.rdf.io.JavaThingRdfConverter;
import dev.enola.rdf.io.RdfReaderConverter;
import dev.enola.rdf.io.RdfWriterConverter;
import dev.enola.thing.repo.ThingRepository;

import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.impl.DynamicModel;
import org.eclipse.rdf4j.model.impl.LinkedHashModelFactory;
import org.eclipse.rdf4j.model.util.Models;

final class ThingsSubject extends Subject {
import java.io.IOException;

public final class ThingsSubject extends Subject {

// TODO add assertThat(Thing actual) - with a SingleThingRepository ?

Expand All @@ -50,17 +56,22 @@ public static Factory<ThingsSubject, ThingRepository> resources() {
private final Model actualModel;
private final ResourceProvider rp = new ClasspathResource.Provider();
private final RdfReaderConverter rdfReaderConverter = new RdfReaderConverter(rp);
private final JavaThingRdfConverter javaThingRdfConverter = new JavaThingRdfConverter();
private final RdfWriterConverter rdfWriterConverter = new RdfWriterConverter();

public ThingsSubject(FailureMetadata metadata, ThingRepository actual) {
super(metadata, actual);
JavaThingRdfConverter javaThingRdfConverter = new JavaThingRdfConverter();
actualModel = javaThingRdfConverter.convert(Streams.stream(actual.list()));
}

public void isEqualTo(String classpathResourcePath) {
public void isEqualTo(String classpathResourcePath) throws IOException {
var resource = rp.getReadableResource(classpathResourcePath);
if (resource == null) throw new IllegalArgumentException(classpathResourcePath);
var expectedModel = rdfReaderConverter.convert(resource).orElse(EMPTY_MODEL);
ModelSubject.assertThat(actualModel).isEqualTo(expectedModel);
if (!Models.isomorphic(actualModel, expectedModel)) {
var actualResource = new MemoryResource(resource.mediaType());
rdfWriterConverter.convertInto(actualModel, actualResource);
ResourceSubject.assertThat(actualResource).containsCharsOf(resource);
}
}
}
6 changes: 4 additions & 2 deletions java/dev/enola/thing/testlib/ThingsSubjectTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,18 @@

import org.junit.Test;

import java.io.IOException;

public class ThingsSubjectTest {

@Test
public void empty() {
public void empty() throws IOException {
ThingsRepository r = new ThingsBuilder();
ThingsSubject.assertThat(r).isEqualTo("classpath:/empty.yaml");
}

@Test
public void ttl() {
public void ttl() throws IOException {
ThingsBuilder r = new ThingsBuilder();
r.getBuilder("https://example.org/greeting1")
.set("https://example.org/message", "hello, world");
Expand Down
3 changes: 1 addition & 2 deletions test-cli.bash
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ set -euox pipefail
# This script tests Enola CLI invocations.
# See also EnolaCLITest

./enola get --load test/test.html enola:/inline
./enola -v rosetta --in test/test.html --out="fd:2?mediaType=text/turtle"
# ...

# PS: Update tika.md with anything (of interest) added here
7 changes: 1 addition & 6 deletions test/test.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@
<meta name="author" content="The Enola <https://enola.dev> Authors" />
<meta name="description" content="This HTML is used to test e.g. the TikaThingConverter." />
</head>
<body>
<p>hello, world</p>

<a href="test.html">Recursive Self Link</a>

<a href="https://www.vorburger.ch">Vorburger Link</a>
<body><p>hello, world</p><a href="test.html">Recursive Self Link</a><a href="https://www.vorburger.ch">Vorburger</a>
</body>
</html>
8 changes: 8 additions & 0 deletions test/test.html.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@base <memory:1> .

<classpath:/test.html> <http://purl.org/dc/elements/1.1/title> "Test HTML Title";
<http://purl.org/dc/elements/1.1/language> "en";
<http://purl.org/dc/elements/1.1/description> "This HTML is used to test e.g. the TikaThingConverter.";
<https://enola.dev/mediaType> "text/html; charset=UTF-8";
<http://purl.org/dc/elements/1.1/creator> "The Enola <https://enola.dev> Authors";
<https://enola.dev/content-as-text> "hello, world\nRecursive Self LinkVorburger".
23 changes: 23 additions & 0 deletions test/test.png.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<classpath:/test.png> <https://enola.dev/tika/Chroma%20ColorSpaceType> "RGB";
<https://enola.dev/tika/Compression%20Lossless> "true";
<https://enola.dev/tika/Data%20SampleFormat> "UnsignedIntegral";
<https://enola.dev/tika/Chroma%20NumChannels> "4";
<https://enola.dev/tika/Transparency%20Alpha> "nonpremultipled";
<https://enola.dev/tika/IHDR> "width=32, height=32, bitDepth=16, colorType=RGBAlpha, compressionMethod=deflate, filterMethod=adaptive, interlaceMethod=none";
<https://enola.dev/tika/imagereader:NumImages> "1";
<https://enola.dev/tika/Dimension%20PixelAspectRatio> "1.0";
<https://enola.dev/tika/Dimension%20ImageOrientation> "Normal";
<https://enola.dev/tika/Data%20PlanarConfiguration> "PixelInterleaved";
<https://enola.dev/tika/width> "32";
<https://enola.dev/tika/Chroma%20Gamma> "1.0";
<https://enola.dev/tika/gAMA> "100000";
<https://enola.dev/mediaType> "image/png";
<https://enola.dev/tika/Compression%20NumProgressiveScans> "1";
<https://enola.dev/tika/Content-Type-Parser-Override> "image/ocr-png";
<https://enola.dev/tika/Data%20BitsPerSample> "16 16 16 16";
<http://ns.adobe.com/tiff/1.0/ImageWidth> "32";
<http://ns.adobe.com/tiff/1.0/ImageLength> "32";
<https://enola.dev/tika/height> "32";
<https://enola.dev/tika/Chroma%20BlackIsZero> "true";
<https://enola.dev/tika/Compression%20CompressionTypeName> "deflate";
<http://ns.adobe.com/tiff/1.0/BitsPerSample> "16 16 16 16".

0 comments on commit 0b33057

Please sign in to comment.