From ba81e9f71aa77975898b6884341a8beb3a4dbc49 Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Mon, 7 Sep 2020 11:01:13 +0200 Subject: [PATCH 1/2] Make MARCXML namespace for record elements configurable. Not all MARCXML sources specify the required namespace URI (notably, Alma General Publishing). By allowing the expected namespace to be configured, or even skipped (by setting it to `null`), this will resolve #330. The record element is the only element that is restricted by namespace; however, the original commit [1] doesn't provide any context as to why the change was introduced. [1] https://sourceforge.net/p/culturegraph/code/1507/ --- .../biblio/marc21/MarcXmlHandler.java | 13 +++++- .../biblio/marc21/MarcXmlHandlerTest.java | 45 +++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlHandler.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlHandler.java index d3fb82959..6f1264140 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlHandler.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlHandler.java @@ -45,8 +45,17 @@ public final class MarcXmlHandler extends DefaultXmlPipe { private static final String LEADER = "leader"; private static final String TYPE = "type"; private String currentTag = ""; + private String namespace = NAMESPACE; private StringBuilder builder = new StringBuilder(); + public void setNamespace(final String namespace) { + this.namespace = namespace; + } + + private boolean checkNamespace(final String uri) { + return namespace == null || namespace.equals(uri); + } + @Override public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException { @@ -58,7 +67,7 @@ public void startElement(final String uri, final String localName, final String }else if(CONTROLFIELD.equals(localName)){ builder = new StringBuilder(); currentTag = attributes.getValue("tag"); - }else if(RECORD.equals(localName) && NAMESPACE.equals(uri)){ + }else if(RECORD.equals(localName) && checkNamespace(uri)){ getReceiver().startRecord(""); getReceiver().literal(TYPE, attributes.getValue(TYPE)); }else if(LEADER.equals(localName)){ @@ -77,7 +86,7 @@ public void endElement(final String uri, final String localName, final String qN }else if(CONTROLFIELD.equals(localName)){ getReceiver().literal(currentTag, builder.toString().trim()); - }else if(RECORD.equals(localName) && NAMESPACE.equals(uri)){ + }else if(RECORD.equals(localName) && checkNamespace(uri)){ getReceiver().endRecord(); }else if(LEADER.equals(localName)){ diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlHandlerTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlHandlerTest.java index af6409f8b..e74d38827 100644 --- a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlHandlerTest.java +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlHandlerTest.java @@ -16,6 +16,7 @@ package org.metafacture.biblio.marc21; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; import org.junit.After; import org.junit.Before; @@ -37,6 +38,8 @@ public final class MarcXmlHandlerTest { private static final String LEADER = "leader"; private static final String CONTROLFIELD = "controlfield"; private static final String NAMESPACE = "http://www.loc.gov/MARC21/slim"; + private static final String RECORD = "record"; + private static final String TYPE = "type"; private MarcXmlHandler marcXmlHandler; @@ -84,4 +87,46 @@ public void issue233ShouldNotRemoveWhitespaceFromLeader() verify(receiver).literal("leader", leaderValue); } + @Test + public void shouldRecognizeRecordsWithNamespace() + throws SAXException { + final AttributesImpl attributes = new AttributesImpl(); + + marcXmlHandler.startElement(NAMESPACE, RECORD, "", attributes); + marcXmlHandler.endElement(NAMESPACE, RECORD, ""); + + verify(receiver).startRecord(""); + verify(receiver).literal(TYPE, null); + verify(receiver).endRecord(); + + verifyNoMoreInteractions(receiver); + } + + @Test + public void shouldNotRecognizeRecordsWithoutNamespace() + throws SAXException { + final AttributesImpl attributes = new AttributesImpl(); + + marcXmlHandler.startElement(null, RECORD, "", attributes); + marcXmlHandler.endElement(null, RECORD, ""); + + verifyNoMoreInteractions(receiver); + } + + @Test + public void issue330ShouldOptionallyRecognizeRecordsWithoutNamespace() + throws SAXException { + final AttributesImpl attributes = new AttributesImpl(); + + marcXmlHandler.setNamespace(null); + marcXmlHandler.startElement(null, RECORD, "", attributes); + marcXmlHandler.endElement(null, RECORD, ""); + + verify(receiver).startRecord(""); + verify(receiver).literal(TYPE, null); + verify(receiver).endRecord(); + + verifyNoMoreInteractions(receiver); + } + } From 8bf5bcb85e6e7921f99519b8d3e054680bcb4ce0 Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Fri, 11 Sep 2020 10:39:20 +0200 Subject: [PATCH 2/2] Allow flushing collectors to only emit when complete. Without this option, a flushing collector (e.g. `EqualsFilter`) always emits its value when being flushed, regardless of whether it's already complete. Only applies to a subset of all flushing collectors since most of them are never "complete"; `All`, on the other hand, has this condition already built-in. --- .../api/helpers/AbstractFlushingCollect.java | 8 +- .../src/main/resources/schemata/metamorph.xsd | 6 ++ .../metamorph/collectors/CombineTest.java | 33 ++++++++ .../metamorph/collectors/EntityTest.java | 37 ++++++++- .../collectors/EqualsFilterTest.java | 82 +++++++++++++++++++ 5 files changed, 164 insertions(+), 2 deletions(-) diff --git a/metamorph-api/src/main/java/org/metafacture/metamorph/api/helpers/AbstractFlushingCollect.java b/metamorph-api/src/main/java/org/metafacture/metamorph/api/helpers/AbstractFlushingCollect.java index 72be7c9a6..1a555f9e7 100644 --- a/metamorph-api/src/main/java/org/metafacture/metamorph/api/helpers/AbstractFlushingCollect.java +++ b/metamorph-api/src/main/java/org/metafacture/metamorph/api/helpers/AbstractFlushingCollect.java @@ -24,10 +24,16 @@ */ public abstract class AbstractFlushingCollect extends AbstractCollect { + private boolean flushIncomplete = true; + + public final void setFlushIncomplete(final boolean flushIncomplete) { + this.flushIncomplete = flushIncomplete; + } + @Override public final void flush(final int recordCount, final int entityCount) { if (isSameRecord(recordCount) && sameEntityConstraintSatisfied(entityCount)) { - if(isConditionMet()) { + if(isConditionMet() && (flushIncomplete || isComplete())) { emit(); } if (getReset()) { diff --git a/metamorph/src/main/resources/schemata/metamorph.xsd b/metamorph/src/main/resources/schemata/metamorph.xsd index 9bbea22db..65ec1a0f2 100644 --- a/metamorph/src/main/resources/schemata/metamorph.xsd +++ b/metamorph/src/main/resources/schemata/metamorph.xsd @@ -283,6 +283,8 @@ + + + ") + .with(" ") + .with(" ") + .with(" ") + .with(" ") + .with("") + .createConnectedTo(receiver); + + metamorph.startRecord("1"); + metamorph.startEntity("e"); + metamorph.literal("l", "1"); + metamorph.endEntity(); + metamorph.startEntity("e"); + metamorph.literal("l", "2"); + metamorph.literal("m", "2"); + metamorph.endEntity(); + metamorph.startEntity("e"); + metamorph.literal("l", "3"); + metamorph.endEntity(); + metamorph.endRecord(); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startRecord("1"); + ordered.verify(receiver).literal("combi", "22"); + ordered.verify(receiver).endRecord(); + ordered.verifyNoMoreInteractions(); + verifyNoMoreInteractions(receiver); + } + @Test public void shouldPostprocessCombinedValue() { metamorph = InlineMorph.in(this) diff --git a/metamorph/src/test/java/org/metafacture/metamorph/collectors/EntityTest.java b/metamorph/src/test/java/org/metafacture/metamorph/collectors/EntityTest.java index 7d66d69b9..ceb291e04 100644 --- a/metamorph/src/test/java/org/metafacture/metamorph/collectors/EntityTest.java +++ b/metamorph/src/test/java/org/metafacture/metamorph/collectors/EntityTest.java @@ -17,6 +17,7 @@ import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verifyNoMoreInteractions; import org.junit.Rule; import org.junit.Test; @@ -119,6 +120,40 @@ public void shouldEmitEnityOnFlushEvent() { ordered.verifyNoMoreInteractions(); } + @Test + public void shouldNotEmitEnityOnFlushEventIfIncomplete() { + metamorph = InlineMorph.in(this) + .with("") + .with(" ") + .with(" ") + .with(" ") + .with(" ") + .with("") + .createConnectedTo(receiver); + + metamorph.startRecord("1"); + metamorph.literal("d1", "a"); + metamorph.literal("d1", "b"); + metamorph.literal("d2", "c"); + metamorph.endRecord(); + metamorph.startRecord("2"); + metamorph.literal("d2", "c"); + metamorph.endRecord(); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startRecord("1"); + ordered.verify(receiver).startEntity("entity"); + ordered.verify(receiver).literal("l1", "a"); + ordered.verify(receiver).literal("l1", "b"); + ordered.verify(receiver).literal("l2", "c"); + ordered.verify(receiver).endEntity(); + ordered.verify(receiver).endRecord(); + ordered.verify(receiver).startRecord("2"); + ordered.verify(receiver).endRecord(); + ordered.verifyNoMoreInteractions(); + verifyNoMoreInteractions(receiver); + } + @Test public void shouldEmitEntityOnEachFlushEvent() { metamorph = InlineMorph.in(this) @@ -488,7 +523,7 @@ public void shouldEmitEntityContentsAgainIfResetIsFalse() { } @Test - public void shouldNotEmitEntityContentsAgainIfResetIsFalse() { + public void shouldNotEmitEntityContentsAgainIfResetIsTrue() { metamorph = InlineMorph.in(this) .with("") .with(" ") diff --git a/metamorph/src/test/java/org/metafacture/metamorph/collectors/EqualsFilterTest.java b/metamorph/src/test/java/org/metafacture/metamorph/collectors/EqualsFilterTest.java index ede76548d..2f6c69351 100644 --- a/metamorph/src/test/java/org/metafacture/metamorph/collectors/EqualsFilterTest.java +++ b/metamorph/src/test/java/org/metafacture/metamorph/collectors/EqualsFilterTest.java @@ -19,6 +19,7 @@ import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verifyNoMoreInteractions; import org.junit.Rule; import org.junit.Test; @@ -233,4 +234,85 @@ public void shouldFireIfLiteralsInEntitiesAreReceivedThatAreNotListedInStatement ordered.verifyNoMoreInteractions(); } + @Test + public void shouldFireOnFlush() { + metamorph = InlineMorph.in(this) + .with("") + .with(" ") + .with(" ") + .with(" ") + .with(" ") + .with("") + .createConnectedTo(receiver); + + metamorph.startRecord("1"); + metamorph.startEntity("field1"); + metamorph.literal("data1", "a"); + metamorph.endEntity(); + metamorph.endRecord(); + metamorph.startRecord("2"); + metamorph.startEntity("field1"); + metamorph.literal("data2", "a"); + metamorph.endEntity(); + metamorph.endRecord(); + metamorph.startRecord("3"); + metamorph.startEntity("field1"); + metamorph.literal("data1", "a"); + metamorph.literal("data2", "a"); + metamorph.endEntity(); + metamorph.endRecord(); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startRecord("1"); + ordered.verify(receiver).endRecord(); + ordered.verify(receiver).startRecord("2"); + ordered.verify(receiver).literal("equalsFiltered", ""); + ordered.verify(receiver).endRecord(); + ordered.verify(receiver).startRecord("3"); + ordered.verify(receiver).literal("equalsFiltered", "a"); + ordered.verify(receiver).endRecord(); + ordered.verifyNoMoreInteractions(); + verifyNoMoreInteractions(receiver); + } + + @Test + public void shouldNotFireOnFlushIfIncomplete() { + metamorph = InlineMorph.in(this) + .with("") + .with(" ") + .with(" ") + .with(" ") + .with(" ") + .with("") + .createConnectedTo(receiver); + + metamorph.startRecord("1"); + metamorph.startEntity("field1"); + metamorph.literal("data1", "a"); + metamorph.endEntity(); + metamorph.endRecord(); + metamorph.startRecord("2"); + metamorph.startEntity("field1"); + metamorph.literal("data2", "a"); + metamorph.endEntity(); + metamorph.endRecord(); + metamorph.startRecord("3"); + metamorph.startEntity("field1"); + metamorph.literal("data1", "a"); + metamorph.literal("data2", "a"); + metamorph.endEntity(); + metamorph.endRecord(); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startRecord("1"); + ordered.verify(receiver).endRecord(); + ordered.verify(receiver).startRecord("2"); + ordered.verify(receiver).endRecord(); + ordered.verify(receiver).startRecord("3"); + ordered.verify(receiver).literal("equalsFiltered", "a"); + ordered.verify(receiver).endRecord(); + ordered.verifyNoMoreInteractions(); + verifyNoMoreInteractions(receiver); + } + }