diff --git a/metafacture-html/src/main/java/org/metafacture/html/ScriptExtractor.java b/metafacture-html/src/main/java/org/metafacture/html/ElementExtractor.java
similarity index 69%
rename from metafacture-html/src/main/java/org/metafacture/html/ScriptExtractor.java
rename to metafacture-html/src/main/java/org/metafacture/html/ElementExtractor.java
index fdcecbfcb..19acdc868 100644
--- a/metafacture-html/src/main/java/org/metafacture/html/ScriptExtractor.java
+++ b/metafacture-html/src/main/java/org/metafacture/html/ElementExtractor.java
@@ -30,21 +30,30 @@
import org.metafacture.framework.helpers.DefaultObjectPipe;
/**
- * Extracts the first script from an HTML document
+ * Extracts the the specified element from an HTML document
*
* @author Fabian Steeg
*/
-@Description("Extracts the first script from an HTML document")
+@Description("Extracts the specified element from an HTML document")
@In(Reader.class)
@Out(String.class)
-@FluxCommand("extract-script")
-public class ScriptExtractor extends DefaultObjectPipe> {
+@FluxCommand("extract-element")
+public class ElementExtractor extends DefaultObjectPipe> {
+ private String selector;
+
+ /**
+ * @param selector The CSS-style jsoup selector, see https://jsoup.org/cookbook/extracting-data/selector-syntax
+ */
+ public ElementExtractor(final String selector) {
+ this.selector = selector;
+ }
+
@Override
public void process(final Reader reader) {
try {
Document document = Jsoup.parse(IOUtils.toString(reader));
- Element firstScript = document.select("script").first();
- getReceiver().process(firstScript.data());
+ Element firstElement = document.select(selector).first();
+ getReceiver().process(firstElement.data());
} catch (IOException e) {
e.printStackTrace();
}
diff --git a/metafacture-html/src/main/resources/flux-commands.properties b/metafacture-html/src/main/resources/flux-commands.properties
index 95f4e031c..e6046cd3f 100644
--- a/metafacture-html/src/main/resources/flux-commands.properties
+++ b/metafacture-html/src/main/resources/flux-commands.properties
@@ -14,4 +14,4 @@
# limitations under the License.
#
decode-html org.metafacture.html.HtmlDecoder
-extract-script org.metafacture.html.ScriptExtractor
+extract-element org.metafacture.html.ElementExtractor
diff --git a/metafacture-html/src/test/java/org/metafacture/html/ScriptExtractorTest.java b/metafacture-html/src/test/java/org/metafacture/html/ElementExtractorTest.java
similarity index 74%
rename from metafacture-html/src/test/java/org/metafacture/html/ScriptExtractorTest.java
rename to metafacture-html/src/test/java/org/metafacture/html/ElementExtractorTest.java
index 6045fad7a..860af9f34 100644
--- a/metafacture-html/src/test/java/org/metafacture/html/ScriptExtractorTest.java
+++ b/metafacture-html/src/test/java/org/metafacture/html/ElementExtractorTest.java
@@ -28,17 +28,20 @@
import org.mockito.MockitoAnnotations;
/**
- * Tests for {@link ScriptExtractor}.
+ * Tests for {@link ElementExtractor}.
*
* @author Fabian Steeg
*
*/
-public final class ScriptExtractorTest {
+public final class ElementExtractorTest {
- private static final StringReader IN = new StringReader(""
+ + "