Skip to content

Commit

Permalink
Add MarcXmlEncoder
Browse files Browse the repository at this point in the history
- add tests

This is a copy of the files of the closed but unmerged PR #297.

See #300.
  • Loading branch information
dr0i committed Jul 1, 2019
1 parent b2400c2 commit 78aaca3
Show file tree
Hide file tree
Showing 3 changed files with 385 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
package org.metafacture.biblio.marc21;

import java.util.Collections;

import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultStreamPipe;

/**
* Encodes a stream into MARCXML.
*
* @author some Jan (Eberhardt) did almost all
* @author Pascal Christoph (dr0i) dug it up again
*/

@Description("Encodes a stream into MARCXML.")
@In(StreamReceiver.class)
@Out(String.class)
@FluxCommand("encode-marc21")
public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
private static final String ROOT_OPEN = "<marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\">";
private static final String ROOT_CLOSE = "</marc:collection>";

private static final String RECORD_OPEN = "<marc:record>";
private static final String RECORD_CLOSE = "</marc:record>";

private static final String CONTROLFIELD_OPEN_TEMPLATE = "<marc:controlfield tag=\"%s\">";
private static final String CONTROLFIELD_CLOSE = "</marc:controlfield>";

private static final String DATAFIELD_OPEN_TEMPLATE = "<marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">";
private static final String DATAFIELD_CLOSE = "</marc:datafield>";

private static final String SUBFIELD_OPEN_TEMPLATE = "<marc:subfield code=\"%s\">";
private static final String SUBFIELD_CLOSE = "</marc:subfield>";

private static final String NEW_LINE = "\n";
private static final String INDENT = "\t";

private static final String XML_DECLARATION_TEMPLATE = "<?xml version=\"%s\" encoding=\"%s\"?>";

private final StringBuilder builder;

private boolean atStreamStart;

private boolean omitXmlDeclaration;
private String xmlVersion;
private String xmlEncoding;

private String currentEntity;
private int indentationLevel;
private boolean formatted;

public MarcXmlEncoder() {
this.builder = new StringBuilder();
this.atStreamStart = true;

this.omitXmlDeclaration = false;
this.xmlVersion = "1.0";
this.xmlEncoding = "UTF-8";

this.currentEntity = "";

this.indentationLevel = 0;
this.formatted = true;
}

public void omitXmlDeclaration(boolean omitXmlDeclaration) {
this.omitXmlDeclaration = omitXmlDeclaration;
}

public void setXmlVersion(String xmlVersion) {
this.xmlVersion = xmlVersion;
}

public void setXmlEncoding(String xmlEncoding) {
this.xmlEncoding = xmlEncoding;
}

/**
* Formats the resulting xml, by indentation.
*
* @param formatted
* True, if formatting is activated.
*/
public void setFormatted(boolean formatted) {
this.formatted = formatted;
}

@Override
public void startRecord(final String identifier) {
if (atStreamStart) {
if (!omitXmlDeclaration) {
writeHeader();
prettyPrintNewLine();
}
writeRaw(ROOT_OPEN);
prettyPrintNewLine();
incrementIndentationLevel();
}
atStreamStart = false;

prettyPrintIndentation();
writeRaw(RECORD_OPEN);
prettyPrintNewLine();

incrementIndentationLevel();
}

@Override
public void endRecord() {
decrementIndentationLevel();
prettyPrintIndentation();
writeRaw(RECORD_CLOSE);
prettyPrintNewLine();
sendAndClearData();
}

@Override
public void startEntity(final String name) {
currentEntity = name;
if (!name.equals("leader")) {
if (name.length() != 5) {
String message = String.format("Entity too short." + "Got a string ('%s') of length %d."
+ "Expected a length of 5 (field + indicators).", name, name.length());
throw new MetafactureException(message);
}

String tag = name.substring(0, 3);
String ind1 = name.substring(3, 4);
String ind2 = name.substring(4, 5);
prettyPrintIndentation();
writeRaw(String.format(DATAFIELD_OPEN_TEMPLATE, tag, ind1, ind2));
prettyPrintNewLine();
incrementIndentationLevel();
}
}

@Override
public void endEntity() {
if (!currentEntity.equals("leader")) {
decrementIndentationLevel();
prettyPrintIndentation();
writeRaw(DATAFIELD_CLOSE);
prettyPrintNewLine();
}
currentEntity = "";
}

@Override
public void literal(final String name, final String value)
{
if (currentEntity.equals(""))
{
prettyPrintIndentation();
writeRaw(String.format(CONTROLFIELD_OPEN_TEMPLATE, name));
writeEscaped(value.trim());
writeRaw(CONTROLFIELD_CLOSE);
prettyPrintNewLine();
}
else if (!currentEntity.equals("leader"))
{
prettyPrintIndentation();
writeRaw(String.format(SUBFIELD_OPEN_TEMPLATE, name));
writeEscaped(value.trim());
writeRaw(SUBFIELD_CLOSE);
prettyPrintNewLine();
}
else {
}

@Override
protected void onResetStream() {
if (!atStreamStart) {
writeFooter();
}
sendAndClearData();
atStreamStart = true;
}

@Override
protected void onCloseStream() {
writeFooter();
sendAndClearData();
}

/** Increments the indentation level by one */
private void incrementIndentationLevel() {
indentationLevel += 1;
}

/** Decrements the indentation level by one */
private void decrementIndentationLevel() {
indentationLevel -= 1;
}

/** Adds a XML Header */
private void writeHeader() {
writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding));
}

/** Closes the root tag */
private void writeFooter() {
writeRaw(ROOT_CLOSE);
}

/** Writes a unescaped sequence */
private void writeRaw(final String str) {
builder.append(str);
}

/** Writes a escaped sequence */
private void writeEscaped(final String str) {
final int len = str.length();
for (int i = 0; i < len; ++i) {
final char c = str.charAt(i);
final String entityName;
switch (c) {
case '&':
entityName = "amp";
break;
case '<':
entityName = "lt";
break;
case '>':
entityName = "gt";
break;
case '\'':
entityName = "apos";
break;
case '"':
entityName = "quot";
break;
default:
entityName = null;
break;
}

if (entityName == null) {
builder.append(c);
} else {
builder.append('&');
builder.append(entityName);
builder.append(';');
}
}
}

private void prettyPrintIndentation() {
if (formatted) {
String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT));
builder.append(prefix);
}
}

private void prettyPrintNewLine() {
if (formatted) {
builder.append(NEW_LINE);
}
}

private void sendAndClearData() {
getReceiver().process(builder.toString());
builder.delete(0, builder.length());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
decode-marc21 org.metafacture.biblio.marc21.Marc21Decoder
encode-marc21 org.metafacture.biblio.marc21.Marc21Encoder
handle-marcxml org.metafacture.biblio.marc21.MarcXmlHandler
encode-marcxml org.metafacture.biblio.marc21.MarcXmlEncoder

decode-pica org.metafacture.biblio.pica.PicaDecoder
encode-pica org.metafacture.biblio.pica.PicaEncoder
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package org.metafacture.biblio.marc21;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.metafacture.framework.helpers.DefaultObjectReceiver;

public class MarcXmlEncoderTest {

private StringBuilder resultCollector;
private MarcXmlEncoder encoder;

@Before
public void setUp() throws Exception {
encoder = new MarcXmlEncoder();
encoder.setFormatted(false);
encoder.setReceiver(new DefaultObjectReceiver<String>() {
@Override
public void process(final String obj) {
resultCollector.append(obj);
}
});
resultCollector = new StringBuilder();
}

@After
public void tearDown() throws Exception {
}

private void addOneRecord(MarcXmlEncoder encoder) {
encoder.startRecord("92005291");
encoder.literal("001", "92005291");
encoder.startEntity("010 ");
encoder.literal("a", "92005291");
encoder.endEntity();
encoder.endRecord();
}

@Test
public void doNotOmitXmlDeclaration() throws Exception {
encoder.omitXmlDeclaration(false);
addOneRecord(encoder);
encoder.closeStream();

String actual = resultCollector.toString();
assertTrue(actual.startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
}

@Test
public void omitXmlDeclaration() throws Exception {
encoder.omitXmlDeclaration(true);
addOneRecord(encoder);
encoder.closeStream();
String actual = resultCollector.toString();
assertTrue(actual.startsWith("<marc:collection"));
assertTrue(actual.endsWith("</marc:collection>"));
}

@Test
public void setXmlVersion() throws Exception {
encoder.omitXmlDeclaration(false);
encoder.setXmlVersion("1.1");
addOneRecord(encoder);
encoder.closeStream();

String actual = resultCollector.toString();
assertTrue(actual.startsWith("<?xml version=\"1.1\" encoding=\"UTF-8\"?>"));
}

@Test
public void setXmlEncoding() throws Exception {
encoder.omitXmlDeclaration(false);
encoder.setXmlEncoding("UTF-16");
addOneRecord(encoder);
encoder.closeStream();

String actual = resultCollector.toString();
assertTrue(actual.startsWith("<?xml version=\"1.0\" encoding=\"UTF-16\"?>"));
}

@Test
public void createAnEmptyRecord() throws Exception {
encoder.startRecord("1");
encoder.endRecord();
encoder.closeStream();
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\"><marc:record></marc:record></marc:collection>";
String actual = resultCollector.toString();
assertEquals(expected, actual);
}

@Test
public void createARecord() throws Exception {
addOneRecord(encoder);
encoder.closeStream();
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\"><marc:record><marc:controlfield tag=\"001\">92005291</marc:controlfield><marc:datafield tag=\"010\" ind1=\" \" ind2=\" \"><marc:subfield code=\"a\">92005291</marc:subfield></marc:datafield></marc:record></marc:collection>";
String actual = resultCollector.toString();
assertEquals(expected, actual);
}

@Test
public void createTwoRecordsInOneCollection() throws Exception {
addOneRecord(encoder);
addOneRecord(encoder);
encoder.closeStream();

String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\"><marc:record><marc:controlfield tag=\"001\">92005291</marc:controlfield><marc:datafield tag=\"010\" ind1=\" \" ind2=\" \"><marc:subfield code=\"a\">92005291</marc:subfield></marc:datafield></marc:record><marc:record><marc:controlfield tag=\"001\">92005291</marc:controlfield><marc:datafield tag=\"010\" ind1=\" \" ind2=\" \"><marc:subfield code=\"a\">92005291</marc:subfield></marc:datafield></marc:record></marc:collection>";
String actual = resultCollector.toString();

assertEquals(expected, actual);
}
}

0 comments on commit 78aaca3

Please sign in to comment.