diff --git a/metafacture-html/src/main/java/org/metafacture/html/HtmlReader.java b/metafacture-html/src/main/java/org/metafacture/html/HtmlReader.java deleted file mode 100644 index 0129a5976..000000000 --- a/metafacture-html/src/main/java/org/metafacture/html/HtmlReader.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2020 Fabian Steeg, hbz - * - * Licensed under the Apache License, Version 2.0 the "License"; - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.metafacture.html; - -import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; - -import org.apache.commons.io.IOUtils; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.metafacture.framework.FluxCommand; -import org.metafacture.framework.ObjectReceiver; -import org.metafacture.framework.annotations.Description; -import org.metafacture.framework.annotations.In; -import org.metafacture.framework.annotations.Out; -import org.metafacture.framework.helpers.DefaultObjectPipe; - -/** - * Parses HTML to X(HT)ML - * - * @author Fabian Steeg - */ -@Description("Parses HTML to X(HT)ML") -@In(Reader.class) -@Out(Reader.class) -@FluxCommand("html-to-xml") -public class HtmlReader extends DefaultObjectPipe> { - @Override - public void process(final Reader reader) { - try { - Document document = Jsoup.parse(IOUtils.toString(reader)); - document.outputSettings().prettyPrint(false).syntax(Document.OutputSettings.Syntax.xml); - getReceiver().process(new StringReader(document.html())); - } catch (IOException e) { - e.printStackTrace(); - } - } -} diff --git a/metafacture-html/src/main/resources/flux-commands.properties b/metafacture-html/src/main/resources/flux-commands.properties index cfab0be69..95f4e031c 100644 --- a/metafacture-html/src/main/resources/flux-commands.properties +++ b/metafacture-html/src/main/resources/flux-commands.properties @@ -13,6 +13,5 @@ # See the License for the specific language governing permissions and # limitations under the License. # -html-to-xml org.metafacture.html.HtmlReader decode-html org.metafacture.html.HtmlDecoder extract-script org.metafacture.html.ScriptExtractor diff --git a/metafacture-html/src/test/java/org/metafacture/html/HtmlReaderTest.java b/metafacture-html/src/test/java/org/metafacture/html/HtmlReaderTest.java deleted file mode 100644 index ec5f0eb76..000000000 --- a/metafacture-html/src/test/java/org/metafacture/html/HtmlReaderTest.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2020 Fabian Steeg, hbz - * - * Licensed under the Apache License, Version 2.0 the "License"; - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.metafacture.html; - -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoMoreInteractions; - -import java.io.Reader; -import java.io.StringReader; - -import org.junit.After; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; -import org.metafacture.framework.ObjectReceiver; -import org.mockito.Mock; -import org.mockito.MockitoAnnotations; - -/** - * Tests for {@link HtmlReader}. - * - * @author Fabian Steeg - * - */ -public final class HtmlReaderTest { - - private static final StringReader IN = new StringReader("hi"); - private static final StringReader OUT = new StringReader("hi"); - - private HtmlReader htmlReader; - - @Mock - private ObjectReceiver receiver; - - @Before - public void setup() { - MockitoAnnotations.initMocks(this); - htmlReader = new HtmlReader(); - htmlReader.setReceiver(receiver); - } - - @Test - @Ignore - public void testShouldProcessRecordsFollowedbySeparator() { - htmlReader.process(IN); - verify(receiver).process(OUT); - verifyNoMoreInteractions(receiver); - } - - @After - public void cleanup() { - htmlReader.closeStream(); - } -}