-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
385 additions
and
0 deletions.
There are no files selected for viewing
270 changes: 270 additions & 0 deletions
270
metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,270 @@ | ||
package org.metafacture.biblio.marc21; | ||
|
||
import java.util.Collections; | ||
|
||
import org.metafacture.framework.FluxCommand; | ||
import org.metafacture.framework.MetafactureException; | ||
import org.metafacture.framework.ObjectReceiver; | ||
import org.metafacture.framework.StreamReceiver; | ||
import org.metafacture.framework.annotations.Description; | ||
import org.metafacture.framework.annotations.In; | ||
import org.metafacture.framework.annotations.Out; | ||
import org.metafacture.framework.helpers.DefaultStreamPipe; | ||
|
||
/** | ||
* Encodes a stream into MARCXML. | ||
* | ||
* @author some Jan (Eberhardt) did almost all | ||
* @author Pascal Christoph (dr0i) dug it up again | ||
*/ | ||
|
||
@Description("Encodes a stream into MARCXML.") | ||
@In(StreamReceiver.class) | ||
@Out(String.class) | ||
@FluxCommand("encode-marc21") | ||
public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<String>> { | ||
private static final String ROOT_OPEN = "<marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\">"; | ||
private static final String ROOT_CLOSE = "</marc:collection>"; | ||
|
||
private static final String RECORD_OPEN = "<marc:record>"; | ||
private static final String RECORD_CLOSE = "</marc:record>"; | ||
|
||
private static final String CONTROLFIELD_OPEN_TEMPLATE = "<marc:controlfield tag=\"%s\">"; | ||
private static final String CONTROLFIELD_CLOSE = "</marc:controlfield>"; | ||
|
||
private static final String DATAFIELD_OPEN_TEMPLATE = "<marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">"; | ||
private static final String DATAFIELD_CLOSE = "</marc:datafield>"; | ||
|
||
private static final String SUBFIELD_OPEN_TEMPLATE = "<marc:subfield code=\"%s\">"; | ||
private static final String SUBFIELD_CLOSE = "</marc:subfield>"; | ||
|
||
private static final String NEW_LINE = "\n"; | ||
private static final String INDENT = "\t"; | ||
|
||
private static final String XML_DECLARATION_TEMPLATE = "<?xml version=\"%s\" encoding=\"%s\"?>"; | ||
|
||
private final StringBuilder builder; | ||
|
||
private boolean atStreamStart; | ||
|
||
private boolean omitXmlDeclaration; | ||
private String xmlVersion; | ||
private String xmlEncoding; | ||
|
||
private String currentEntity; | ||
private int indentationLevel; | ||
private boolean formatted; | ||
|
||
public MarcXmlEncoder() { | ||
this.builder = new StringBuilder(); | ||
this.atStreamStart = true; | ||
|
||
this.omitXmlDeclaration = false; | ||
this.xmlVersion = "1.0"; | ||
this.xmlEncoding = "UTF-8"; | ||
|
||
this.currentEntity = ""; | ||
|
||
this.indentationLevel = 0; | ||
this.formatted = true; | ||
} | ||
|
||
public void omitXmlDeclaration(boolean omitXmlDeclaration) { | ||
this.omitXmlDeclaration = omitXmlDeclaration; | ||
} | ||
|
||
public void setXmlVersion(String xmlVersion) { | ||
this.xmlVersion = xmlVersion; | ||
} | ||
|
||
public void setXmlEncoding(String xmlEncoding) { | ||
this.xmlEncoding = xmlEncoding; | ||
} | ||
|
||
/** | ||
* Formats the resulting xml, by indentation. | ||
* | ||
* @param formatted | ||
* True, if formatting is activated. | ||
*/ | ||
public void setFormatted(boolean formatted) { | ||
this.formatted = formatted; | ||
} | ||
|
||
@Override | ||
public void startRecord(final String identifier) { | ||
if (atStreamStart) { | ||
if (!omitXmlDeclaration) { | ||
writeHeader(); | ||
prettyPrintNewLine(); | ||
} | ||
writeRaw(ROOT_OPEN); | ||
prettyPrintNewLine(); | ||
incrementIndentationLevel(); | ||
} | ||
atStreamStart = false; | ||
|
||
prettyPrintIndentation(); | ||
writeRaw(RECORD_OPEN); | ||
prettyPrintNewLine(); | ||
|
||
incrementIndentationLevel(); | ||
} | ||
|
||
@Override | ||
public void endRecord() { | ||
decrementIndentationLevel(); | ||
prettyPrintIndentation(); | ||
writeRaw(RECORD_CLOSE); | ||
prettyPrintNewLine(); | ||
sendAndClearData(); | ||
} | ||
|
||
@Override | ||
public void startEntity(final String name) { | ||
currentEntity = name; | ||
if (!name.equals("leader")) { | ||
if (name.length() != 5) { | ||
String message = String.format("Entity too short." + "Got a string ('%s') of length %d." | ||
+ "Expected a length of 5 (field + indicators).", name, name.length()); | ||
throw new MetafactureException(message); | ||
} | ||
|
||
String tag = name.substring(0, 3); | ||
String ind1 = name.substring(3, 4); | ||
String ind2 = name.substring(4, 5); | ||
prettyPrintIndentation(); | ||
writeRaw(String.format(DATAFIELD_OPEN_TEMPLATE, tag, ind1, ind2)); | ||
prettyPrintNewLine(); | ||
incrementIndentationLevel(); | ||
} | ||
} | ||
|
||
@Override | ||
public void endEntity() { | ||
if (!currentEntity.equals("leader")) { | ||
decrementIndentationLevel(); | ||
prettyPrintIndentation(); | ||
writeRaw(DATAFIELD_CLOSE); | ||
prettyPrintNewLine(); | ||
} | ||
currentEntity = ""; | ||
} | ||
|
||
@Override | ||
public void literal(final String name, final String value) | ||
{ | ||
if (currentEntity.equals("")) | ||
{ | ||
prettyPrintIndentation(); | ||
writeRaw(String.format(CONTROLFIELD_OPEN_TEMPLATE, name)); | ||
writeEscaped(value.trim()); | ||
writeRaw(CONTROLFIELD_CLOSE); | ||
prettyPrintNewLine(); | ||
} | ||
else if (!currentEntity.equals("leader")) | ||
{ | ||
prettyPrintIndentation(); | ||
writeRaw(String.format(SUBFIELD_OPEN_TEMPLATE, name)); | ||
writeEscaped(value.trim()); | ||
writeRaw(SUBFIELD_CLOSE); | ||
prettyPrintNewLine(); | ||
} | ||
else { | ||
} | ||
|
||
@Override | ||
protected void onResetStream() { | ||
if (!atStreamStart) { | ||
writeFooter(); | ||
} | ||
sendAndClearData(); | ||
atStreamStart = true; | ||
} | ||
|
||
@Override | ||
protected void onCloseStream() { | ||
writeFooter(); | ||
sendAndClearData(); | ||
} | ||
|
||
/** Increments the indentation level by one */ | ||
private void incrementIndentationLevel() { | ||
indentationLevel += 1; | ||
} | ||
|
||
/** Decrements the indentation level by one */ | ||
private void decrementIndentationLevel() { | ||
indentationLevel -= 1; | ||
} | ||
|
||
/** Adds a XML Header */ | ||
private void writeHeader() { | ||
writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding)); | ||
} | ||
|
||
/** Closes the root tag */ | ||
private void writeFooter() { | ||
writeRaw(ROOT_CLOSE); | ||
} | ||
|
||
/** Writes a unescaped sequence */ | ||
private void writeRaw(final String str) { | ||
builder.append(str); | ||
} | ||
|
||
/** Writes a escaped sequence */ | ||
private void writeEscaped(final String str) { | ||
final int len = str.length(); | ||
for (int i = 0; i < len; ++i) { | ||
final char c = str.charAt(i); | ||
final String entityName; | ||
switch (c) { | ||
case '&': | ||
entityName = "amp"; | ||
break; | ||
case '<': | ||
entityName = "lt"; | ||
break; | ||
case '>': | ||
entityName = "gt"; | ||
break; | ||
case '\'': | ||
entityName = "apos"; | ||
break; | ||
case '"': | ||
entityName = "quot"; | ||
break; | ||
default: | ||
entityName = null; | ||
break; | ||
} | ||
|
||
if (entityName == null) { | ||
builder.append(c); | ||
} else { | ||
builder.append('&'); | ||
builder.append(entityName); | ||
builder.append(';'); | ||
} | ||
} | ||
} | ||
|
||
private void prettyPrintIndentation() { | ||
if (formatted) { | ||
String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT)); | ||
builder.append(prefix); | ||
} | ||
} | ||
|
||
private void prettyPrintNewLine() { | ||
if (formatted) { | ||
builder.append(NEW_LINE); | ||
} | ||
} | ||
|
||
private void sendAndClearData() { | ||
getReceiver().process(builder.toString()); | ||
builder.delete(0, builder.length()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
114 changes: 114 additions & 0 deletions
114
metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
package org.metafacture.biblio.marc21; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
import static org.junit.Assert.assertTrue; | ||
|
||
import org.junit.After; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
import org.metafacture.framework.helpers.DefaultObjectReceiver; | ||
|
||
public class MarcXmlEncoderTest { | ||
|
||
private StringBuilder resultCollector; | ||
private MarcXmlEncoder encoder; | ||
|
||
@Before | ||
public void setUp() throws Exception { | ||
encoder = new MarcXmlEncoder(); | ||
encoder.setFormatted(false); | ||
encoder.setReceiver(new DefaultObjectReceiver<String>() { | ||
@Override | ||
public void process(final String obj) { | ||
resultCollector.append(obj); | ||
} | ||
}); | ||
resultCollector = new StringBuilder(); | ||
} | ||
|
||
@After | ||
public void tearDown() throws Exception { | ||
} | ||
|
||
private void addOneRecord(MarcXmlEncoder encoder) { | ||
encoder.startRecord("92005291"); | ||
encoder.literal("001", "92005291"); | ||
encoder.startEntity("010 "); | ||
encoder.literal("a", "92005291"); | ||
encoder.endEntity(); | ||
encoder.endRecord(); | ||
} | ||
|
||
@Test | ||
public void doNotOmitXmlDeclaration() throws Exception { | ||
encoder.omitXmlDeclaration(false); | ||
addOneRecord(encoder); | ||
encoder.closeStream(); | ||
|
||
String actual = resultCollector.toString(); | ||
assertTrue(actual.startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")); | ||
} | ||
|
||
@Test | ||
public void omitXmlDeclaration() throws Exception { | ||
encoder.omitXmlDeclaration(true); | ||
addOneRecord(encoder); | ||
encoder.closeStream(); | ||
String actual = resultCollector.toString(); | ||
assertTrue(actual.startsWith("<marc:collection")); | ||
assertTrue(actual.endsWith("</marc:collection>")); | ||
} | ||
|
||
@Test | ||
public void setXmlVersion() throws Exception { | ||
encoder.omitXmlDeclaration(false); | ||
encoder.setXmlVersion("1.1"); | ||
addOneRecord(encoder); | ||
encoder.closeStream(); | ||
|
||
String actual = resultCollector.toString(); | ||
assertTrue(actual.startsWith("<?xml version=\"1.1\" encoding=\"UTF-8\"?>")); | ||
} | ||
|
||
@Test | ||
public void setXmlEncoding() throws Exception { | ||
encoder.omitXmlDeclaration(false); | ||
encoder.setXmlEncoding("UTF-16"); | ||
addOneRecord(encoder); | ||
encoder.closeStream(); | ||
|
||
String actual = resultCollector.toString(); | ||
assertTrue(actual.startsWith("<?xml version=\"1.0\" encoding=\"UTF-16\"?>")); | ||
} | ||
|
||
@Test | ||
public void createAnEmptyRecord() throws Exception { | ||
encoder.startRecord("1"); | ||
encoder.endRecord(); | ||
encoder.closeStream(); | ||
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\"><marc:record></marc:record></marc:collection>"; | ||
String actual = resultCollector.toString(); | ||
assertEquals(expected, actual); | ||
} | ||
|
||
@Test | ||
public void createARecord() throws Exception { | ||
addOneRecord(encoder); | ||
encoder.closeStream(); | ||
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\"><marc:record><marc:controlfield tag=\"001\">92005291</marc:controlfield><marc:datafield tag=\"010\" ind1=\" \" ind2=\" \"><marc:subfield code=\"a\">92005291</marc:subfield></marc:datafield></marc:record></marc:collection>"; | ||
String actual = resultCollector.toString(); | ||
assertEquals(expected, actual); | ||
} | ||
|
||
@Test | ||
public void createTwoRecordsInOneCollection() throws Exception { | ||
addOneRecord(encoder); | ||
addOneRecord(encoder); | ||
encoder.closeStream(); | ||
|
||
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\"><marc:record><marc:controlfield tag=\"001\">92005291</marc:controlfield><marc:datafield tag=\"010\" ind1=\" \" ind2=\" \"><marc:subfield code=\"a\">92005291</marc:subfield></marc:datafield></marc:record><marc:record><marc:controlfield tag=\"001\">92005291</marc:controlfield><marc:datafield tag=\"010\" ind1=\" \" ind2=\" \"><marc:subfield code=\"a\">92005291</marc:subfield></marc:datafield></marc:record></marc:collection>"; | ||
String actual = resultCollector.toString(); | ||
|
||
assertEquals(expected, actual); | ||
} | ||
} |