diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java new file mode 100644 index 000000000..e0fc9973b --- /dev/null +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java @@ -0,0 +1,270 @@ +package org.metafacture.biblio.marc21; + +import java.util.Collections; + +import org.metafacture.framework.FluxCommand; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.ObjectReceiver; +import org.metafacture.framework.StreamReceiver; +import org.metafacture.framework.annotations.Description; +import org.metafacture.framework.annotations.In; +import org.metafacture.framework.annotations.Out; +import org.metafacture.framework.helpers.DefaultStreamPipe; + +/** + * Encodes a stream into MARCXML. + * + * @author some Jan (Eberhardt) did almost all + * @author Pascal Christoph (dr0i) dug it up again + */ + +@Description("Encodes a stream into MARCXML.") +@In(StreamReceiver.class) +@Out(String.class) +@FluxCommand("encode-marc21") +public final class MarcXmlEncoder extends DefaultStreamPipe> { + private static final String ROOT_OPEN = ""; + private static final String ROOT_CLOSE = ""; + + private static final String RECORD_OPEN = ""; + private static final String RECORD_CLOSE = ""; + + private static final String CONTROLFIELD_OPEN_TEMPLATE = ""; + private static final String CONTROLFIELD_CLOSE = ""; + + private static final String DATAFIELD_OPEN_TEMPLATE = ""; + private static final String DATAFIELD_CLOSE = ""; + + private static final String SUBFIELD_OPEN_TEMPLATE = ""; + private static final String SUBFIELD_CLOSE = ""; + + private static final String NEW_LINE = "\n"; + private static final String INDENT = "\t"; + + private static final String XML_DECLARATION_TEMPLATE = ""; + + private final StringBuilder builder; + + private boolean atStreamStart; + + private boolean omitXmlDeclaration; + private String xmlVersion; + private String xmlEncoding; + + private String currentEntity; + private int indentationLevel; + private boolean formatted; + + public MarcXmlEncoder() { + this.builder = new StringBuilder(); + this.atStreamStart = true; + + this.omitXmlDeclaration = false; + this.xmlVersion = "1.0"; + this.xmlEncoding = "UTF-8"; + + this.currentEntity = ""; + + this.indentationLevel = 0; + this.formatted = true; + } + + public void omitXmlDeclaration(boolean omitXmlDeclaration) { + this.omitXmlDeclaration = omitXmlDeclaration; + } + + public void setXmlVersion(String xmlVersion) { + this.xmlVersion = xmlVersion; + } + + public void setXmlEncoding(String xmlEncoding) { + this.xmlEncoding = xmlEncoding; + } + + /** + * Formats the resulting xml, by indentation. + * + * @param formatted + * True, if formatting is activated. + */ + public void setFormatted(boolean formatted) { + this.formatted = formatted; + } + + @Override + public void startRecord(final String identifier) { + if (atStreamStart) { + if (!omitXmlDeclaration) { + writeHeader(); + prettyPrintNewLine(); + } + writeRaw(ROOT_OPEN); + prettyPrintNewLine(); + incrementIndentationLevel(); + } + atStreamStart = false; + + prettyPrintIndentation(); + writeRaw(RECORD_OPEN); + prettyPrintNewLine(); + + incrementIndentationLevel(); + } + + @Override + public void endRecord() { + decrementIndentationLevel(); + prettyPrintIndentation(); + writeRaw(RECORD_CLOSE); + prettyPrintNewLine(); + sendAndClearData(); + } + + @Override + public void startEntity(final String name) { + currentEntity = name; + if (!name.equals("leader")) { + if (name.length() != 5) { + String message = String.format("Entity too short." + "Got a string ('%s') of length %d." + + "Expected a length of 5 (field + indicators).", name, name.length()); + throw new MetafactureException(message); + } + + String tag = name.substring(0, 3); + String ind1 = name.substring(3, 4); + String ind2 = name.substring(4, 5); + prettyPrintIndentation(); + writeRaw(String.format(DATAFIELD_OPEN_TEMPLATE, tag, ind1, ind2)); + prettyPrintNewLine(); + incrementIndentationLevel(); + } + } + + @Override + public void endEntity() { + if (!currentEntity.equals("leader")) { + decrementIndentationLevel(); + prettyPrintIndentation(); + writeRaw(DATAFIELD_CLOSE); + prettyPrintNewLine(); + } + currentEntity = ""; + } + + @Override + public void literal(final String name, final String value) + { + if (currentEntity.equals("")) + { + prettyPrintIndentation(); + writeRaw(String.format(CONTROLFIELD_OPEN_TEMPLATE, name)); + writeEscaped(value.trim()); + writeRaw(CONTROLFIELD_CLOSE); + prettyPrintNewLine(); + } + else if (!currentEntity.equals("leader")) + { + prettyPrintIndentation(); + writeRaw(String.format(SUBFIELD_OPEN_TEMPLATE, name)); + writeEscaped(value.trim()); + writeRaw(SUBFIELD_CLOSE); + prettyPrintNewLine(); + } + else { + } + + @Override + protected void onResetStream() { + if (!atStreamStart) { + writeFooter(); + } + sendAndClearData(); + atStreamStart = true; + } + + @Override + protected void onCloseStream() { + writeFooter(); + sendAndClearData(); + } + + /** Increments the indentation level by one */ + private void incrementIndentationLevel() { + indentationLevel += 1; + } + + /** Decrements the indentation level by one */ + private void decrementIndentationLevel() { + indentationLevel -= 1; + } + + /** Adds a XML Header */ + private void writeHeader() { + writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding)); + } + + /** Closes the root tag */ + private void writeFooter() { + writeRaw(ROOT_CLOSE); + } + + /** Writes a unescaped sequence */ + private void writeRaw(final String str) { + builder.append(str); + } + + /** Writes a escaped sequence */ + private void writeEscaped(final String str) { + final int len = str.length(); + for (int i = 0; i < len; ++i) { + final char c = str.charAt(i); + final String entityName; + switch (c) { + case '&': + entityName = "amp"; + break; + case '<': + entityName = "lt"; + break; + case '>': + entityName = "gt"; + break; + case '\'': + entityName = "apos"; + break; + case '"': + entityName = "quot"; + break; + default: + entityName = null; + break; + } + + if (entityName == null) { + builder.append(c); + } else { + builder.append('&'); + builder.append(entityName); + builder.append(';'); + } + } + } + + private void prettyPrintIndentation() { + if (formatted) { + String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT)); + builder.append(prefix); + } + } + + private void prettyPrintNewLine() { + if (formatted) { + builder.append(NEW_LINE); + } + } + + private void sendAndClearData() { + getReceiver().process(builder.toString()); + builder.delete(0, builder.length()); + } +} \ No newline at end of file diff --git a/metafacture-biblio/src/main/resources/flux-commands.properties b/metafacture-biblio/src/main/resources/flux-commands.properties index 91d97c488..25ce5d5bf 100644 --- a/metafacture-biblio/src/main/resources/flux-commands.properties +++ b/metafacture-biblio/src/main/resources/flux-commands.properties @@ -16,6 +16,7 @@ decode-marc21 org.metafacture.biblio.marc21.Marc21Decoder encode-marc21 org.metafacture.biblio.marc21.Marc21Encoder handle-marcxml org.metafacture.biblio.marc21.MarcXmlHandler +encode-marcxml org.metafacture.biblio.marc21.MarcXmlEncoder decode-pica org.metafacture.biblio.pica.PicaDecoder encode-pica org.metafacture.biblio.pica.PicaEncoder diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java new file mode 100644 index 000000000..faf5df5eb --- /dev/null +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java @@ -0,0 +1,114 @@ +package org.metafacture.biblio.marc21; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.metafacture.framework.helpers.DefaultObjectReceiver; + +public class MarcXmlEncoderTest { + + private StringBuilder resultCollector; + private MarcXmlEncoder encoder; + + @Before + public void setUp() throws Exception { + encoder = new MarcXmlEncoder(); + encoder.setFormatted(false); + encoder.setReceiver(new DefaultObjectReceiver() { + @Override + public void process(final String obj) { + resultCollector.append(obj); + } + }); + resultCollector = new StringBuilder(); + } + + @After + public void tearDown() throws Exception { + } + + private void addOneRecord(MarcXmlEncoder encoder) { + encoder.startRecord("92005291"); + encoder.literal("001", "92005291"); + encoder.startEntity("010 "); + encoder.literal("a", "92005291"); + encoder.endEntity(); + encoder.endRecord(); + } + + @Test + public void doNotOmitXmlDeclaration() throws Exception { + encoder.omitXmlDeclaration(false); + addOneRecord(encoder); + encoder.closeStream(); + + String actual = resultCollector.toString(); + assertTrue(actual.startsWith("")); + } + + @Test + public void omitXmlDeclaration() throws Exception { + encoder.omitXmlDeclaration(true); + addOneRecord(encoder); + encoder.closeStream(); + String actual = resultCollector.toString(); + assertTrue(actual.startsWith("")); + } + + @Test + public void setXmlVersion() throws Exception { + encoder.omitXmlDeclaration(false); + encoder.setXmlVersion("1.1"); + addOneRecord(encoder); + encoder.closeStream(); + + String actual = resultCollector.toString(); + assertTrue(actual.startsWith("")); + } + + @Test + public void setXmlEncoding() throws Exception { + encoder.omitXmlDeclaration(false); + encoder.setXmlEncoding("UTF-16"); + addOneRecord(encoder); + encoder.closeStream(); + + String actual = resultCollector.toString(); + assertTrue(actual.startsWith("")); + } + + @Test + public void createAnEmptyRecord() throws Exception { + encoder.startRecord("1"); + encoder.endRecord(); + encoder.closeStream(); + String expected = ""; + String actual = resultCollector.toString(); + assertEquals(expected, actual); + } + + @Test + public void createARecord() throws Exception { + addOneRecord(encoder); + encoder.closeStream(); + String expected = "9200529192005291"; + String actual = resultCollector.toString(); + assertEquals(expected, actual); + } + + @Test + public void createTwoRecordsInOneCollection() throws Exception { + addOneRecord(encoder); + addOneRecord(encoder); + encoder.closeStream(); + + String expected = "92005291920052919200529192005291"; + String actual = resultCollector.toString(); + + assertEquals(expected, actual); + } +}