diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/iso2709/Record.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/iso2709/Record.java
index 290f66e53..38ba469be 100644
--- a/metafacture-biblio/src/main/java/org/metafacture/biblio/iso2709/Record.java
+++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/iso2709/Record.java
@@ -156,6 +156,14 @@ public String getRecordId() {
return buffer.stringAt(dataStart, dataLength, charset);
}
+ /**
+ * Returns the record leader.
+ *
+ * @return a string which is the record leader.
+ */
+ public String getLabel() {
+ return label.toString();
+ }
/**
* Iterates through all fields in the record and calls the appropriate method
* on the supplied {@link FieldHandler} instance.
diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Decoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Decoder.java
index 437739b65..e52b54e4e 100644
--- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Decoder.java
+++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Decoder.java
@@ -140,6 +140,7 @@ public final class Marc21Decoder
private final FieldHandler fieldHandler = new Marc21Handler();
private boolean ignoreMissingId;
+ private boolean emitLeaderAsWhole;
/**
* Controls whether the decoder aborts processing if a record has no
@@ -164,6 +165,24 @@ public boolean getIgnoreMissingId() {
return ignoreMissingId;
}
+ /**
+ * Controls whether the Record Leader should be emitted as a whole instead of
+ * extracting the bibliographic information in the record leader.
+ *
+ * @see MARC 21
+ * Standard: Record Leader
+ *
+ * @param emitLeaderAsWhole
+ * true if the leader should be emitted as a whole.
+ */
+ public void setEmitLeaderAsWhole(final boolean emitLeaderAsWhole) {
+ this.emitLeaderAsWhole = emitLeaderAsWhole;
+ }
+
+ public boolean getEmitLeaderAsWhole() {
+ return emitLeaderAsWhole;
+ }
+
@Override
public void process(final String obj) {
if (obj.isEmpty()) {
@@ -207,9 +226,12 @@ private String tryGetRecordId(final Record record) {
}
private void emitLeader(final Record record) {
+ getReceiver().startEntity(Marc21EventNames.LEADER_ENTITY);
+ if (emitLeaderAsWhole){
+ getReceiver().literal(Marc21EventNames.LEADER_ENTITY, record.getLabel());
+ }else {
final char[] implCodes = record.getImplCodes();
final char[] systemChars = record.getSystemChars();
- getReceiver().startEntity(Marc21EventNames.LEADER_ENTITY);
getReceiver().literal(Marc21EventNames.RECORD_STATUS_LITERAL, String.valueOf(
record.getRecordStatus()));
getReceiver().literal(Marc21EventNames.RECORD_TYPE_LITERAL, String.valueOf(
@@ -226,6 +248,7 @@ private void emitLeader(final Record record) {
systemChars[Marc21Constants.CATALOGING_FORM_INDEX]));
getReceiver().literal(Marc21EventNames.MULTIPART_LEVEL_LITERAL, String.valueOf(
systemChars[Marc21Constants.MULTIPART_LEVEL_INDEX]));
+ }
getReceiver().endEntity();
}
diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java
new file mode 100644
index 000000000..330584dd4
--- /dev/null
+++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java
@@ -0,0 +1,275 @@
+package org.metafacture.biblio.marc21;
+
+import java.util.Collections;
+
+import org.metafacture.framework.FluxCommand;
+import org.metafacture.framework.MetafactureException;
+import org.metafacture.framework.ObjectReceiver;
+import org.metafacture.framework.StreamReceiver;
+import org.metafacture.framework.annotations.Description;
+import org.metafacture.framework.annotations.In;
+import org.metafacture.framework.annotations.Out;
+import org.metafacture.framework.helpers.DefaultStreamPipe;
+
+/**
+ * Encodes a stream into MARCXML.
+ *
+ * @author some Jan (Eberhardt) did almost all
+ * @author Pascal Christoph (dr0i) dug it up again
+ */
+
+@Description("Encodes a stream into MARCXML.")
+@In(StreamReceiver.class)
+@Out(String.class)
+@FluxCommand("encode-marc21")
+public final class MarcXmlEncoder extends DefaultStreamPipe> {
+ private static final String ROOT_OPEN = "";
+ private static final String ROOT_CLOSE = "";
+
+ private static final String RECORD_OPEN = "";
+ private static final String RECORD_CLOSE = "";
+
+ private static final String CONTROLFIELD_OPEN_TEMPLATE = "";
+ private static final String CONTROLFIELD_CLOSE = "";
+
+ private static final String DATAFIELD_OPEN_TEMPLATE = "";
+ private static final String DATAFIELD_CLOSE = "";
+
+ private static final String SUBFIELD_OPEN_TEMPLATE = "";
+ private static final String SUBFIELD_CLOSE = "";
+
+ private static final String LEADER_OPEN_TEMPLATE = "";
+ private static final String LEADER_CLOSE_TEMPLATE = "";
+
+ private static final String NEW_LINE = "\n";
+ private static final String INDENT = "\t";
+
+ private static final String XML_DECLARATION_TEMPLATE = "";
+
+ private final StringBuilder builder;
+
+ private boolean atStreamStart;
+
+ private boolean omitXmlDeclaration;
+ private String xmlVersion;
+ private String xmlEncoding;
+
+ private String currentEntity;
+ private int indentationLevel;
+ private boolean formatted;
+
+ public MarcXmlEncoder() {
+ this.builder = new StringBuilder();
+ this.atStreamStart = true;
+
+ this.omitXmlDeclaration = false;
+ this.xmlVersion = "1.0";
+ this.xmlEncoding = "UTF-8";
+
+ this.currentEntity = "";
+
+ this.indentationLevel = 0;
+ this.formatted = true;
+ }
+
+ public void omitXmlDeclaration(boolean omitXmlDeclaration) {
+ this.omitXmlDeclaration = omitXmlDeclaration;
+ }
+
+ public void setXmlVersion(String xmlVersion) {
+ this.xmlVersion = xmlVersion;
+ }
+
+ public void setXmlEncoding(String xmlEncoding) {
+ this.xmlEncoding = xmlEncoding;
+ }
+
+ /**
+ * Formats the resulting xml, by indentation.
+ *
+ * @param formatted
+ * True, if formatting is activated.
+ */
+ public void setFormatted(boolean formatted) {
+ this.formatted = formatted;
+ }
+
+ @Override
+ public void startRecord(final String identifier) {
+ if (atStreamStart) {
+ if (!omitXmlDeclaration) {
+ writeHeader();
+ prettyPrintNewLine();
+ }
+ writeRaw(ROOT_OPEN);
+ prettyPrintNewLine();
+ incrementIndentationLevel();
+ }
+ atStreamStart = false;
+
+ prettyPrintIndentation();
+ writeRaw(RECORD_OPEN);
+ prettyPrintNewLine();
+
+ incrementIndentationLevel();
+ }
+
+ @Override
+ public void endRecord() {
+ decrementIndentationLevel();
+ prettyPrintIndentation();
+ writeRaw(RECORD_CLOSE);
+ prettyPrintNewLine();
+ sendAndClearData();
+ }
+
+ @Override
+ public void startEntity(final String name) {
+ currentEntity = name;
+ if (!name.equals("leader")) {
+ if (name.length() != 5) {
+ String message = String.format("Entity too short." + "Got a string ('%s') of length %d."
+ + "Expected a length of 5 (field + indicators).", name, name.length());
+ throw new MetafactureException(message);
+ }
+
+ String tag = name.substring(0, 3);
+ String ind1 = name.substring(3, 4);
+ String ind2 = name.substring(4, 5);
+ prettyPrintIndentation();
+ writeRaw(String.format(DATAFIELD_OPEN_TEMPLATE, tag, ind1, ind2));
+ prettyPrintNewLine();
+ incrementIndentationLevel();
+ }
+ }
+
+ @Override
+ public void endEntity() {
+ if (!currentEntity.equals("leader")) {
+ decrementIndentationLevel();
+ prettyPrintIndentation();
+ writeRaw(DATAFIELD_CLOSE);
+ prettyPrintNewLine();
+ }
+ currentEntity = "";
+ }
+
+ @Override
+ public void literal(final String name, final String value) {
+ if (currentEntity.equals("")) {
+ prettyPrintIndentation();
+ writeRaw(String.format(CONTROLFIELD_OPEN_TEMPLATE, name));
+ writeEscaped(value.trim());
+ writeRaw(CONTROLFIELD_CLOSE);
+ prettyPrintNewLine();
+ } else if (!currentEntity.equals("leader")) {
+ prettyPrintIndentation();
+ writeRaw(String.format(SUBFIELD_OPEN_TEMPLATE, name));
+ writeEscaped(value.trim());
+ writeRaw(SUBFIELD_CLOSE);
+ prettyPrintNewLine();
+ } else {
+ if (name.equals(Marc21EventNames.LEADER_ENTITY)) {
+ prettyPrintIndentation();
+ writeRaw(LEADER_OPEN_TEMPLATE + value + LEADER_CLOSE_TEMPLATE);
+ prettyPrintNewLine();
+ }
+ }
+
+ }
+
+ @Override
+ protected void onResetStream() {
+ if (!atStreamStart) {
+ writeFooter();
+ }
+ sendAndClearData();
+ atStreamStart = true;
+ }
+
+ @Override
+ protected void onCloseStream() {
+ writeFooter();
+ sendAndClearData();
+ }
+
+ /** Increments the indentation level by one */
+ private void incrementIndentationLevel() {
+ indentationLevel += 1;
+ }
+
+ /** Decrements the indentation level by one */
+ private void decrementIndentationLevel() {
+ indentationLevel -= 1;
+ }
+
+ /** Adds a XML Header */
+ private void writeHeader() {
+ writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding));
+ }
+
+ /** Closes the root tag */
+ private void writeFooter() {
+ writeRaw(ROOT_CLOSE);
+ }
+
+ /** Writes a unescaped sequence */
+ private void writeRaw(final String str) {
+ builder.append(str);
+ }
+
+ /** Writes a escaped sequence */
+ private void writeEscaped(final String str) {
+ final int len = str.length();
+ for (int i = 0; i < len; ++i) {
+ final char c = str.charAt(i);
+ final String entityName;
+ switch (c) {
+ case '&':
+ entityName = "amp";
+ break;
+ case '<':
+ entityName = "lt";
+ break;
+ case '>':
+ entityName = "gt";
+ break;
+ case '\'':
+ entityName = "apos";
+ break;
+ case '"':
+ entityName = "quot";
+ break;
+ default:
+ entityName = null;
+ break;
+ }
+
+ if (entityName == null) {
+ builder.append(c);
+ } else {
+ builder.append('&');
+ builder.append(entityName);
+ builder.append(';');
+ }
+ }
+ }
+
+ private void prettyPrintIndentation() {
+ if (formatted) {
+ String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT));
+ builder.append(prefix);
+ }
+ }
+
+ private void prettyPrintNewLine() {
+ if (formatted) {
+ builder.append(NEW_LINE);
+ }
+ }
+
+ private void sendAndClearData() {
+ getReceiver().process(builder.toString());
+ builder.delete(0, builder.length());
+ }
+}
\ No newline at end of file
diff --git a/metafacture-biblio/src/main/resources/flux-commands.properties b/metafacture-biblio/src/main/resources/flux-commands.properties
index 91d97c488..25ce5d5bf 100644
--- a/metafacture-biblio/src/main/resources/flux-commands.properties
+++ b/metafacture-biblio/src/main/resources/flux-commands.properties
@@ -16,6 +16,7 @@
decode-marc21 org.metafacture.biblio.marc21.Marc21Decoder
encode-marc21 org.metafacture.biblio.marc21.Marc21Encoder
handle-marcxml org.metafacture.biblio.marc21.MarcXmlHandler
+encode-marcxml org.metafacture.biblio.marc21.MarcXmlEncoder
decode-pica org.metafacture.biblio.pica.PicaDecoder
encode-pica org.metafacture.biblio.pica.PicaEncoder
diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java
new file mode 100644
index 000000000..5c37374a6
--- /dev/null
+++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2019 Pascal Christoph (hbz)
+ *
+ * Licensed under the Apache License, Version 2.0 the "License";
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.metafacture.biblio.marc21;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.metafacture.framework.helpers.DefaultObjectReceiver;
+
+/**
+ * Tests for class {@link MarcXmlEncoder}.
+ *
+ * @author some Jan (Eberhardt) did almost all
+ * @author Pascal Christoph (dr0i) dug it up again
+ *
+ */
+
+public class MarcXmlEncoderTest {
+ private static StringBuilder resultCollector ;
+ private static MarcXmlEncoder encoder ;
+ private static final String XML_DECLARATION = "";
+ private static final String XML_1_DECLARATION = "";
+ private static final String XML_16_DECLARATION = "";
+ private static final String XML_ROOT_OPEN = "";
+ private static final String XML_RECORD = "92005291"
+ + "92005291"
+ + "";
+ private static final String XML_MARC_COLLECTION_END_TAG = "";
+ private static final String RECORD_ID = "92005291";
+
+ @Before
+ public void setUp() throws Exception {
+ encoder = new MarcXmlEncoder();
+ encoder.setFormatted(false);
+ encoder.setReceiver(new DefaultObjectReceiver() {
+ @Override
+ public void process(final String obj) {
+ resultCollector.append(obj);
+ }
+ });
+ resultCollector= new StringBuilder();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ }
+
+ private void addOneRecord(MarcXmlEncoder encoder) {
+ encoder.startRecord(RECORD_ID);
+ encoder.literal("001", RECORD_ID);
+ encoder.startEntity("010 ");
+ encoder.literal("a", RECORD_ID);
+ encoder.endEntity();
+ encoder.endRecord();
+ }
+
+ @Test
+ public void doNotOmitXmlDeclaration() throws Exception {
+ encoder.omitXmlDeclaration(false);
+ addOneRecord(encoder);
+ encoder.closeStream();
+
+ String actual = resultCollector.toString();
+ assertTrue(actual.startsWith(XML_DECLARATION));
+ }
+
+ @Test
+ public void omitXmlDeclaration() throws Exception {
+ encoder.omitXmlDeclaration(true);
+ addOneRecord(encoder);
+ encoder.closeStream();
+ String actual = resultCollector.toString();
+ assertTrue(actual.startsWith(""
+ + XML_MARC_COLLECTION_END_TAG;
+ String actual = resultCollector.toString();
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ public void createARecord() throws Exception {
+ addOneRecord(encoder);
+ encoder.closeStream();
+ String expected = XML_DECLARATION + XML_ROOT_OPEN + XML_RECORD + "";
+ String actual = resultCollector.toString();
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ public void createTwoRecordsInOneCollection() throws Exception {
+ addOneRecord(encoder);
+ addOneRecord(encoder);
+ encoder.closeStream();
+ String expected = XML_DECLARATION + XML_ROOT_OPEN + XML_RECORD + XML_RECORD + "";
+ String actual = resultCollector.toString();
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ public void createAnRecordWithLeader() throws Exception {
+ encoder.startRecord("1");
+ encoder.startEntity("leader");
+ encoder.literal("leader", "dummy");
+ encoder.endEntity();
+ encoder.endRecord();
+ encoder.closeStream();
+ String expected = XML_DECLARATION + XML_ROOT_OPEN + "dummy"
+ + XML_MARC_COLLECTION_END_TAG;
+ String actual = resultCollector.toString();
+ assertEquals(expected, actual);
+ }
+}
diff --git a/metafacture-strings/src/main/java/org/metafacture/strings/LineRecorder.java b/metafacture-strings/src/main/java/org/metafacture/strings/LineRecorder.java
index 588353d61..9fe465fca 100644
--- a/metafacture-strings/src/main/java/org/metafacture/strings/LineRecorder.java
+++ b/metafacture-strings/src/main/java/org/metafacture/strings/LineRecorder.java
@@ -1,5 +1,4 @@
-/*
- * Copyright 2019 hbz
+/* Copyright 2019 Pascal Christoph (hbz)
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
@@ -13,46 +12,81 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.metafacture.strings;
import org.metafacture.framework.FluxCommand;
+import org.metafacture.framework.ObjectPipe;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
-import org.metafacture.framework.helpers.DefaultObjectPipe;
/**
* Collects strings and emits them as records when a line matches the pattern.
- * Appends to every incoming line a line feed so that the original structure is
- * preserved.
+ * Appends to every incoming line a line feed so that the original structure
+ * is preserved.
*
* @author Pascal Christoph (dr0i).
*
*/
-@Description("Collects strings and emits them as records when a line matches the pattern.")
-@In(String.class)
-@Out(String.class)
-@FluxCommand("lines-to-records")
+@Description ( "Collects strings and emits them as records when a"
+ +" line matches the pattern or the stream is closed." )
+@In ( String.class )
+@Out ( String.class )
+@FluxCommand ( "lines-to-records" )
public final class LineRecorder
- extends DefaultObjectPipe> {
+ implements ObjectPipe> {
+
+ private final static int SB_CAPACITY=4096*7;
+ // empty line is the default
+ private String recordMarkerRegexp="^\\s*$";
+ StringBuilder record=new StringBuilder(
+ SB_CAPACITY);
+ ObjectReceiver receiver;
+
+ public void setRecordMarkerRegexp ( final String regexp ) {
+ recordMarkerRegexp=regexp;
+ }
+
+ @Override
+ public void process ( final String line ) {
+ if(line.matches(
+ recordMarkerRegexp)){
+ getReceiver().process(
+ record.toString());
+ record=new StringBuilder(
+ SB_CAPACITY);
+ }else
+ record.append(
+ line+"\n");
+ }
- private final int SB_CAPACITY = 4096 * 7;
- private String recordMarkerRegexp = "^\\s*$"; // empty line is default
- StringBuilder record = new StringBuilder(SB_CAPACITY);
+ @Override
+ public void resetStream ( ) {
+ record=new StringBuilder(
+ SB_CAPACITY);
+ }
- public void setRecordMarkerRegexp(final String regexp) {
- this.recordMarkerRegexp = regexp;
+ @Override
+ public void closeStream ( ) {
+ getReceiver().process(
+ record.toString());
}
@Override
- public void process(final String line) {
- assert !isClosed();
- if (line.matches(recordMarkerRegexp)) {
- getReceiver().process(record.toString());
- record = new StringBuilder(SB_CAPACITY);
- } else
- record.append(line + "\n");
+ public > R setReceiver ( R receiver ) {
+ this.receiver=receiver;
+ return receiver;
+ }
+
+ /**
+ * Returns a reference to the downstream module.
+ *
+ * @return reference to the downstream module
+ */
+ protected final ObjectReceiver getReceiver ( ) {
+ return receiver;
}
}
diff --git a/metafacture-strings/src/test/java/org/metafacture/strings/LineRecorderTest.java b/metafacture-strings/src/test/java/org/metafacture/strings/LineRecorderTest.java
index 7573b58de..eefc21208 100644
--- a/metafacture-strings/src/test/java/org/metafacture/strings/LineRecorderTest.java
+++ b/metafacture-strings/src/test/java/org/metafacture/strings/LineRecorderTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2019 Pascal Christoph
+ * Copyright 2019 Pascal Christoph (hbz)
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
@@ -99,4 +99,18 @@ public void shouldEmitRecordWithNonDefaultRecordMarker() {
ordered.verifyNoMoreInteractions();
}
+ @Test
+ public void shouldEmitLastRecordWithoutRecordMarkerWhenClosingStream() {
+ lineRecorder.process(RECORD3_PART1);
+ lineRecorder.process(RECORD3_PART2);
+ lineRecorder.closeStream();
+ final InOrder ordered = inOrder(receiver);
+ ordered.verify(receiver).process(
+ RECORD3_PART1 +
+ LINE_SEPARATOR +
+ RECORD3_PART2 +
+ LINE_SEPARATOR);
+ ordered.verifyNoMoreInteractions();
+ }
+
}