From 0b330577c510d54047e810fe6a8058d3395f5948 Mon Sep 17 00:00:00 2001 From: Michael Vorburger Date: Wed, 25 Sep 2024 02:14:55 +0200 Subject: [PATCH] fix (core): Improve Tika Test Coverage by comparing with actual full TTL --- java/dev/enola/format/tika/BUILD | 1 + .../enola/format/tika/TikaThingConverter.java | 3 +- .../format/tika/TikaThingConverterTest.java | 8 ++- java/dev/enola/thing/testlib/BUILD | 1 + .../dev/enola/thing/testlib/ModelSubject.java | 54 ------------------- .../enola/thing/testlib/ThingsSubject.java | 19 +++++-- .../thing/testlib/ThingsSubjectTest.java | 6 ++- test-cli.bash | 3 +- test/test.html | 7 +-- test/test.html.ttl | 8 +++ test/test.png.ttl | 23 ++++++++ 11 files changed, 63 insertions(+), 70 deletions(-) delete mode 100644 java/dev/enola/thing/testlib/ModelSubject.java create mode 100644 test/test.html.ttl create mode 100644 test/test.png.ttl diff --git a/java/dev/enola/format/tika/BUILD b/java/dev/enola/format/tika/BUILD index f8e9aff8..6af90346 100644 --- a/java/dev/enola/format/tika/BUILD +++ b/java/dev/enola/format/tika/BUILD @@ -49,6 +49,7 @@ junit_tests( ":tika", "//java/dev/enola/common/io", "//java/dev/enola/thing:thing_java", + "//java/dev/enola/thing/testlib", "//test", ], ) diff --git a/java/dev/enola/format/tika/TikaThingConverter.java b/java/dev/enola/format/tika/TikaThingConverter.java index 8860a0cb..d25dd942 100644 --- a/java/dev/enola/format/tika/TikaThingConverter.java +++ b/java/dev/enola/format/tika/TikaThingConverter.java @@ -80,7 +80,8 @@ public boolean convertInto(ReadableResource resource, ThingsBuilder thingsBuilde parser.parse(is, handler, metadata, parseContext); var thing = thingsBuilder.getBuilder(resource.uri().toString()); convertMetadata(metadata, thing); - thing.set("https://enola.dev/content-as-text", sw.toString()); + var text = sw.toString().trim(); + if (!text.isEmpty()) thing.set("https://enola.dev/content-as-text", text); return true; } catch (TikaException | SAXException e) { diff --git a/java/dev/enola/format/tika/TikaThingConverterTest.java b/java/dev/enola/format/tika/TikaThingConverterTest.java index e4614801..cb656635 100644 --- a/java/dev/enola/format/tika/TikaThingConverterTest.java +++ b/java/dev/enola/format/tika/TikaThingConverterTest.java @@ -23,6 +23,7 @@ import dev.enola.common.io.resource.EmptyResource; import dev.enola.thing.Thing; import dev.enola.thing.repo.ThingsBuilder; +import dev.enola.thing.testlib.ThingsSubject; import org.junit.Test; @@ -58,14 +59,19 @@ public void png() throws IOException { // TODO @Test public void epubEBook() throws IOException { private void check(String classpath) throws IOException { + var name = "classpath:/" + classpath; + var tb = new ThingsBuilder(); var c = new TikaThingConverter(new ClasspathResource.Provider()); - var r = c.convertInto(URI.create("classpath:/" + classpath), tb); + var r = c.convertInto(URI.create(name), tb); assertThat(r).isTrue(); + assertThat(tb.builders()).hasSize(1); var thing = tb.builders().iterator().next().build(); checkThatAllPredicatesAreAbsoluteURIs(thing); + + ThingsSubject.assertThat(tb).isEqualTo(name + ".ttl"); } private void checkThatAllPredicatesAreAbsoluteURIs(Thing thing) { diff --git a/java/dev/enola/thing/testlib/BUILD b/java/dev/enola/thing/testlib/BUILD index 4acf4a3c..e49382b7 100644 --- a/java/dev/enola/thing/testlib/BUILD +++ b/java/dev/enola/thing/testlib/BUILD @@ -26,6 +26,7 @@ java_library( visibility = ["//:__subpackages__"], deps = [ "//java/dev/enola/common/io", + "//java/dev/enola/common/io/testlib", "//java/dev/enola/rdf/io", "//java/dev/enola/thing:thing_java", "@maven//:com_google_errorprone_error_prone_annotations", diff --git a/java/dev/enola/thing/testlib/ModelSubject.java b/java/dev/enola/thing/testlib/ModelSubject.java deleted file mode 100644 index 64f2b3d7..00000000 --- a/java/dev/enola/thing/testlib/ModelSubject.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * Copyright 2024 The Enola Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package dev.enola.thing.testlib; - -import static com.google.common.truth.Truth.assertAbout; - -import com.google.common.truth.FailureMetadata; -import com.google.common.truth.Subject; -import com.google.common.truth.Truth; - -import org.eclipse.rdf4j.model.Model; -import org.eclipse.rdf4j.model.util.Models; - -final class ModelSubject extends Subject { - - // TODO Remove this copy/paste duplicate of rdf.io.ModelSubject, once that's in a testlib - - public static ModelSubject assertThat(Model actual) { - return assertAbout(resources()).that(actual); - } - - public static Factory resources() { - return ModelSubject::new; - } - - private final Model actual; - - public ModelSubject(FailureMetadata metadata, Model actual) { - super(metadata, actual); - this.actual = actual; - } - - public void isEqualTo(Model expected) { - if (!Models.isomorphic(actual, expected)) { - // TODO Canonicalizer-like sorting of Statements by IRI - Truth.assertThat(actual).isEqualTo(expected); - } - } -} diff --git a/java/dev/enola/thing/testlib/ThingsSubject.java b/java/dev/enola/thing/testlib/ThingsSubject.java index 942b927e..acabb8f8 100644 --- a/java/dev/enola/thing/testlib/ThingsSubject.java +++ b/java/dev/enola/thing/testlib/ThingsSubject.java @@ -24,16 +24,22 @@ import com.google.common.truth.Subject; import dev.enola.common.io.resource.ClasspathResource; +import dev.enola.common.io.resource.MemoryResource; import dev.enola.common.io.resource.ResourceProvider; +import dev.enola.common.io.testlib.ResourceSubject; import dev.enola.rdf.io.JavaThingRdfConverter; import dev.enola.rdf.io.RdfReaderConverter; +import dev.enola.rdf.io.RdfWriterConverter; import dev.enola.thing.repo.ThingRepository; import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.model.impl.DynamicModel; import org.eclipse.rdf4j.model.impl.LinkedHashModelFactory; +import org.eclipse.rdf4j.model.util.Models; -final class ThingsSubject extends Subject { +import java.io.IOException; + +public final class ThingsSubject extends Subject { // TODO add assertThat(Thing actual) - with a SingleThingRepository ? @@ -50,17 +56,22 @@ public static Factory resources() { private final Model actualModel; private final ResourceProvider rp = new ClasspathResource.Provider(); private final RdfReaderConverter rdfReaderConverter = new RdfReaderConverter(rp); - private final JavaThingRdfConverter javaThingRdfConverter = new JavaThingRdfConverter(); + private final RdfWriterConverter rdfWriterConverter = new RdfWriterConverter(); public ThingsSubject(FailureMetadata metadata, ThingRepository actual) { super(metadata, actual); + JavaThingRdfConverter javaThingRdfConverter = new JavaThingRdfConverter(); actualModel = javaThingRdfConverter.convert(Streams.stream(actual.list())); } - public void isEqualTo(String classpathResourcePath) { + public void isEqualTo(String classpathResourcePath) throws IOException { var resource = rp.getReadableResource(classpathResourcePath); if (resource == null) throw new IllegalArgumentException(classpathResourcePath); var expectedModel = rdfReaderConverter.convert(resource).orElse(EMPTY_MODEL); - ModelSubject.assertThat(actualModel).isEqualTo(expectedModel); + if (!Models.isomorphic(actualModel, expectedModel)) { + var actualResource = new MemoryResource(resource.mediaType()); + rdfWriterConverter.convertInto(actualModel, actualResource); + ResourceSubject.assertThat(actualResource).containsCharsOf(resource); + } } } diff --git a/java/dev/enola/thing/testlib/ThingsSubjectTest.java b/java/dev/enola/thing/testlib/ThingsSubjectTest.java index 36925407..31ca174a 100644 --- a/java/dev/enola/thing/testlib/ThingsSubjectTest.java +++ b/java/dev/enola/thing/testlib/ThingsSubjectTest.java @@ -22,16 +22,18 @@ import org.junit.Test; +import java.io.IOException; + public class ThingsSubjectTest { @Test - public void empty() { + public void empty() throws IOException { ThingsRepository r = new ThingsBuilder(); ThingsSubject.assertThat(r).isEqualTo("classpath:/empty.yaml"); } @Test - public void ttl() { + public void ttl() throws IOException { ThingsBuilder r = new ThingsBuilder(); r.getBuilder("https://example.org/greeting1") .set("https://example.org/message", "hello, world"); diff --git a/test-cli.bash b/test-cli.bash index 74019e73..12ca3638 100755 --- a/test-cli.bash +++ b/test-cli.bash @@ -20,7 +20,6 @@ set -euox pipefail # This script tests Enola CLI invocations. # See also EnolaCLITest -./enola get --load test/test.html enola:/inline -./enola -v rosetta --in test/test.html --out="fd:2?mediaType=text/turtle" +# ... # PS: Update tika.md with anything (of interest) added here diff --git a/test/test.html b/test/test.html index be76cba4..c3a18cc8 100644 --- a/test/test.html +++ b/test/test.html @@ -6,11 +6,6 @@ - -

hello, world

- - Recursive Self Link - - Vorburger Link +

hello, world

Recursive Self LinkVorburger diff --git a/test/test.html.ttl b/test/test.html.ttl new file mode 100644 index 00000000..21136121 --- /dev/null +++ b/test/test.html.ttl @@ -0,0 +1,8 @@ +@base . + + "Test HTML Title"; + "en"; + "This HTML is used to test e.g. the TikaThingConverter."; + "text/html; charset=UTF-8"; + "The Enola Authors"; + "hello, world\nRecursive Self LinkVorburger". diff --git a/test/test.png.ttl b/test/test.png.ttl new file mode 100644 index 00000000..9bcdd044 --- /dev/null +++ b/test/test.png.ttl @@ -0,0 +1,23 @@ + "RGB"; + "true"; + "UnsignedIntegral"; + "4"; + "nonpremultipled"; + "width=32, height=32, bitDepth=16, colorType=RGBAlpha, compressionMethod=deflate, filterMethod=adaptive, interlaceMethod=none"; + "1"; + "1.0"; + "Normal"; + "PixelInterleaved"; + "32"; + "1.0"; + "100000"; + "image/png"; + "1"; + "image/ocr-png"; + "16 16 16 16"; + "32"; + "32"; + "32"; + "true"; + "deflate"; + "16 16 16 16".