From 948a06a0462d7515d7bf4265678b7822d98e9312 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 27 Apr 2020 16:21:35 +0200 Subject: [PATCH] Test runner See #1058. --- .../run/CulturegraphXmlFilterHbzToJson.java | 23 +++++-- .../CulturegraphXmlFilterHbzToJsonTest.java | 66 ++++--------------- src/test/resources/jsonld-cg/bulk.ndjson | 2 +- 3 files changed, 31 insertions(+), 60 deletions(-) diff --git a/src/main/java/org/lobid/resources/run/CulturegraphXmlFilterHbzToJson.java b/src/main/java/org/lobid/resources/run/CulturegraphXmlFilterHbzToJson.java index 1693c141c..90de28d49 100644 --- a/src/main/java/org/lobid/resources/run/CulturegraphXmlFilterHbzToJson.java +++ b/src/main/java/org/lobid/resources/run/CulturegraphXmlFilterHbzToJson.java @@ -25,18 +25,26 @@ **/ @SuppressWarnings("javadoc") public final class CulturegraphXmlFilterHbzToJson { - private static final String JSON_FILE = "bulk.ndjson"; + private static final String ELASTICSEARCH_INDEX_NAME = "cg"; + private static String JSON_FILE="bulk.ndjson"; + private static final String XML_SPLITTER_ELEMENT = "record"; + private static final String XML_SPLITTER_TOP_ELEMENT = "marc:collection"; public static void main(String... args) { + String XML_INPUT_FILE =new File(args[0]).getAbsolutePath(); + + if (args.length >1) JSON_FILE=args[1]; + final FileOpener opener = new FileOpener(); opener.setReceiver(new XmlDecoder()) - .setReceiver( - new XmlElementSplitter("marc:collection", "record")) // + .setReceiver(new XmlElementSplitter(XML_SPLITTER_TOP_ELEMENT, + XML_SPLITTER_ELEMENT)) // .setReceiver(new LiteralToObject()) .setReceiver(new ObjectThreader())// .addReceiver(receiverThread()); // one thread for it's working - // on one file atm - opener.process(new File(args[0]).getAbsolutePath()); + // on one file + opener.process( + new File(XML_INPUT_FILE).getAbsolutePath()); try { opener.closeStream(); } catch (final NullPointerException e) { @@ -47,12 +55,13 @@ public static void main(String... args) { private static StringReader receiverThread() { final StringReader sr = new StringReader(); sr.setReceiver(new XmlDecoder()).setReceiver(new MarcXmlHandler()) - .setReceiver(new Filter( + .setReceiver(new Filter( // prevents empty records new Metamorph("src/main/resources/morph-cg-to-es.xml"))) .setReceiver( new Metamorph("src/main/resources/morph-cg-to-es.xml")) .setReceiver(new JsonEncoder()) - .setReceiver(new JsonToElasticsearchBulk("rvk", "cg")) + .setReceiver(new JsonToElasticsearchBulk("rvk", + ELASTICSEARCH_INDEX_NAME)) .setReceiver(new ObjectWriter<>(JSON_FILE)); return sr; } diff --git a/src/test/java/org/lobid/resources/CulturegraphXmlFilterHbzToJsonTest.java b/src/test/java/org/lobid/resources/CulturegraphXmlFilterHbzToJsonTest.java index 458cdf31c..d5c033d0f 100644 --- a/src/test/java/org/lobid/resources/CulturegraphXmlFilterHbzToJsonTest.java +++ b/src/test/java/org/lobid/resources/CulturegraphXmlFilterHbzToJsonTest.java @@ -30,18 +30,7 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; -import org.metafacture.biblio.marc21.MarcXmlHandler; -import org.metafacture.elasticsearch.JsonToElasticsearchBulk; -import org.metafacture.flowcontrol.ObjectThreader; -import org.metafacture.io.FileOpener; -import org.metafacture.io.ObjectWriter; -import org.metafacture.json.JsonEncoder; -import org.metafacture.mangling.LiteralToObject; -import org.metafacture.metamorph.Filter; -import org.metafacture.metamorph.Metamorph; -import org.metafacture.strings.StringReader; -import org.metafacture.xml.XmlDecoder; -import org.metafacture.xml.XmlElementSplitter; +import org.lobid.resources.run.CulturegraphXmlFilterHbzToJson; /** * Test of filtering resources with hbz holdings from culturegraph marcxml, @@ -54,25 +43,23 @@ public final class CulturegraphXmlFilterHbzToJsonTest { private static final Logger LOG = - LogManager.getLogger(CulturegraphXmlFilterHbzToJsonTest.class); + LogManager.getLogger(CulturegraphXmlFilterHbzToJsonTest.class); private static final String PATH_TO_TEST = "src/test/resources/"; private static final String JSON_OUTPUT_FILE = - PATH_TO_TEST + "jsonld-cg/bulk.ndjson"; - private static final String XML_INPUT_FILE = - "/aggregate_auslieferung_20191212.small.marcxml"; - - private static final String XML_SPLITTER_ELEMENT = "record"; - private static final String XML_SPLITTER_TOP_ELEMENT = "marc:collection"; + PATH_TO_TEST + "jsonld-cg/bulk.ndjson"; + private static final String XML_INPUT_FILE = + "/aggregate_auslieferung_20191212.small.marcxml"; private static PluginConfigurableNode node; private static Client client; private static final int ELASTICSEARCH_HTTP_PORT = 19200; - private static final String ELASTICSEARCH_INDEX_NAME = "cg"; + private static final String ELASTICSEARCH_BULK_URI = - "http://localhost:" + ELASTICSEARCH_HTTP_PORT + "/_bulk"; + "http://localhost:" + ELASTICSEARCH_HTTP_PORT + "/_bulk"; private static final String ELASTICSEARCH_TEST_NODE_NAME = "testNodeCgRvk"; - + // classToTest = new CulturegraphXmlFilterHbzToJson(); + private static final Collection> plugins = Arrays.asList(Netty4Plugin.class); @@ -93,7 +80,8 @@ public static void setup() { } node = new PluginConfigurableNode(Settings.builder() - .put(Node.NODE_NAME_SETTING.getKey(), ELASTICSEARCH_TEST_NODE_NAME) + .put(Node.NODE_NAME_SETTING.getKey(), + ELASTICSEARCH_TEST_NODE_NAME) .put(NetworkModule.TRANSPORT_TYPE_KEY, NetworkModule.LOCAL_TRANSPORT) .put("http.enabled", "true").put("path.home", "tmp") @@ -122,35 +110,8 @@ public static void setup() { * Extract and transform */ private static void etl() { - final FileOpener opener = new FileOpener(); - opener.setReceiver(new XmlDecoder()) - .setReceiver(new XmlElementSplitter(XML_SPLITTER_TOP_ELEMENT, - XML_SPLITTER_ELEMENT)) // - .setReceiver(new LiteralToObject()) - .setReceiver(new ObjectThreader())// - .addReceiver(receiverThread()); // one thread for it's working - // on one file - opener.process( - new File(PATH_TO_TEST + XML_INPUT_FILE).getAbsolutePath()); - try { - opener.closeStream(); - } catch (final NullPointerException e) { - // ignore, see https://github.com/hbz/lobid-resources/issues/1030 - } - } - - private static StringReader receiverThread() { - final StringReader sr = new StringReader(); - sr.setReceiver(new XmlDecoder()).setReceiver(new MarcXmlHandler()) - .setReceiver(new Filter( // prevents empty records - new Metamorph("src/main/resources/morph-cg-to-es.xml"))) - .setReceiver( - new Metamorph("src/main/resources/morph-cg-to-es.xml")) - .setReceiver(new JsonEncoder()) - .setReceiver(new JsonToElasticsearchBulk("rvk", - ELASTICSEARCH_INDEX_NAME)) - .setReceiver(new ObjectWriter<>(JSON_OUTPUT_FILE)); - return sr; + CulturegraphXmlFilterHbzToJson.main(PATH_TO_TEST + XML_INPUT_FILE, + JSON_OUTPUT_FILE); } @SuppressWarnings("static-method") @@ -163,6 +124,7 @@ public void testIngestJsonBulkIntoElasticsearch() { } } + // TODO: create and use metafacture modul "http-writer" private static void ingest() throws IOException { File jsonFile = new File(JSON_OUTPUT_FILE); HttpEntity entity = new FileEntity(jsonFile); diff --git a/src/test/resources/jsonld-cg/bulk.ndjson b/src/test/resources/jsonld-cg/bulk.ndjson index 731665d8e..ecd6b901a 100644 --- a/src/test/resources/jsonld-cg/bulk.ndjson +++ b/src/test/resources/jsonld-cg/bulk.ndjson @@ -1,2 +1,2 @@ -{"index":{"_index":"cg","_type":"rvk","_id":null}} +{"index":{"_index":"cg","_type":"rvk"}} {"rvk":["CI 1100","5,1"],"hbzId":"HT013166356, HT018625006"}