Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

300 add marc xml encoder #302

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,14 @@ public String getRecordId() {
return buffer.stringAt(dataStart, dataLength, charset);
}

/**
* Returns the record leader.
*
* @return a string which is the record leader.
*/
public String getLabel() {
return label.toString();
}
/**
* Iterates through all fields in the record and calls the appropriate method
* on the supplied {@link FieldHandler} instance.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ public final class Marc21Decoder
private final FieldHandler fieldHandler = new Marc21Handler();

private boolean ignoreMissingId;
private boolean emitLeaderAsWhole;

/**
* Controls whether the decoder aborts processing if a record has no
Expand All @@ -164,6 +165,24 @@ public boolean getIgnoreMissingId() {
return ignoreMissingId;
}

/**
* Controls whether the Record Leader should be emitted as a whole instead of
* extracting the bibliographic information in the record leader.
*
* @see <a href="http://www.loc.gov/marc/bibliographic/bdleader.html">MARC 21
* Standard: Record Leader</a>
*
* @param emitLeaderAsWhole
* true if the leader should be emitted as a whole.
*/
public void setEmitLeaderAsWhole(final boolean emitLeaderAsWhole) {
this.emitLeaderAsWhole = emitLeaderAsWhole;
}

public boolean getEmitLeaderAsWhole() {
return emitLeaderAsWhole;
}

@Override
public void process(final String obj) {
if (obj.isEmpty()) {
Expand Down Expand Up @@ -207,9 +226,12 @@ private String tryGetRecordId(final Record record) {
}

private void emitLeader(final Record record) {
getReceiver().startEntity(Marc21EventNames.LEADER_ENTITY);
if (emitLeaderAsWhole){
getReceiver().literal(Marc21EventNames.LEADER_ENTITY, record.getLabel());
}else {
final char[] implCodes = record.getImplCodes();
final char[] systemChars = record.getSystemChars();
getReceiver().startEntity(Marc21EventNames.LEADER_ENTITY);
getReceiver().literal(Marc21EventNames.RECORD_STATUS_LITERAL, String.valueOf(
record.getRecordStatus()));
getReceiver().literal(Marc21EventNames.RECORD_TYPE_LITERAL, String.valueOf(
Expand All @@ -226,6 +248,7 @@ private void emitLeader(final Record record) {
systemChars[Marc21Constants.CATALOGING_FORM_INDEX]));
getReceiver().literal(Marc21EventNames.MULTIPART_LEVEL_LITERAL, String.valueOf(
systemChars[Marc21Constants.MULTIPART_LEVEL_INDEX]));
}
getReceiver().endEntity();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
package org.metafacture.biblio.marc21;

import java.util.Collections;

import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultStreamPipe;

/**
* Encodes a stream into MARCXML.
*
* @author some Jan (Eberhardt) did almost all
* @author Pascal Christoph (dr0i) dug it up again
*/

@Description("Encodes a stream into MARCXML.")
@In(StreamReceiver.class)
@Out(String.class)
@FluxCommand("encode-marc21")
public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
private static final String ROOT_OPEN = "<marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\">";
private static final String ROOT_CLOSE = "</marc:collection>";

private static final String RECORD_OPEN = "<marc:record>";
private static final String RECORD_CLOSE = "</marc:record>";

private static final String CONTROLFIELD_OPEN_TEMPLATE = "<marc:controlfield tag=\"%s\">";
private static final String CONTROLFIELD_CLOSE = "</marc:controlfield>";

private static final String DATAFIELD_OPEN_TEMPLATE = "<marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">";
private static final String DATAFIELD_CLOSE = "</marc:datafield>";

private static final String SUBFIELD_OPEN_TEMPLATE = "<marc:subfield code=\"%s\">";
private static final String SUBFIELD_CLOSE = "</marc:subfield>";

private static final String LEADER_OPEN_TEMPLATE = "<marc:leader>";
private static final String LEADER_CLOSE_TEMPLATE = "</marc:leader>";

private static final String NEW_LINE = "\n";
private static final String INDENT = "\t";

private static final String XML_DECLARATION_TEMPLATE = "<?xml version=\"%s\" encoding=\"%s\"?>";

private final StringBuilder builder;

private boolean atStreamStart;

private boolean omitXmlDeclaration;
private String xmlVersion;
private String xmlEncoding;

private String currentEntity;
private int indentationLevel;
private boolean formatted;

public MarcXmlEncoder() {
this.builder = new StringBuilder();
this.atStreamStart = true;

this.omitXmlDeclaration = false;
this.xmlVersion = "1.0";
this.xmlEncoding = "UTF-8";

this.currentEntity = "";

this.indentationLevel = 0;
this.formatted = true;
}

public void omitXmlDeclaration(boolean omitXmlDeclaration) {
this.omitXmlDeclaration = omitXmlDeclaration;
}

public void setXmlVersion(String xmlVersion) {
this.xmlVersion = xmlVersion;
}

public void setXmlEncoding(String xmlEncoding) {
this.xmlEncoding = xmlEncoding;
}

/**
* Formats the resulting xml, by indentation.
*
* @param formatted
* True, if formatting is activated.
*/
public void setFormatted(boolean formatted) {
this.formatted = formatted;
}

@Override
public void startRecord(final String identifier) {
if (atStreamStart) {
if (!omitXmlDeclaration) {
writeHeader();
prettyPrintNewLine();
}
writeRaw(ROOT_OPEN);
prettyPrintNewLine();
incrementIndentationLevel();
}
atStreamStart = false;

prettyPrintIndentation();
writeRaw(RECORD_OPEN);
prettyPrintNewLine();

incrementIndentationLevel();
}

@Override
public void endRecord() {
decrementIndentationLevel();
prettyPrintIndentation();
writeRaw(RECORD_CLOSE);
prettyPrintNewLine();
sendAndClearData();
}

@Override
public void startEntity(final String name) {
currentEntity = name;
if (!name.equals("leader")) {
if (name.length() != 5) {
String message = String.format("Entity too short." + "Got a string ('%s') of length %d."
+ "Expected a length of 5 (field + indicators).", name, name.length());
throw new MetafactureException(message);
}

String tag = name.substring(0, 3);
String ind1 = name.substring(3, 4);
String ind2 = name.substring(4, 5);
prettyPrintIndentation();
writeRaw(String.format(DATAFIELD_OPEN_TEMPLATE, tag, ind1, ind2));
prettyPrintNewLine();
incrementIndentationLevel();
}
}

@Override
public void endEntity() {
if (!currentEntity.equals("leader")) {
decrementIndentationLevel();
prettyPrintIndentation();
writeRaw(DATAFIELD_CLOSE);
prettyPrintNewLine();
}
currentEntity = "";
}

@Override
public void literal(final String name, final String value) {
if (currentEntity.equals("")) {
prettyPrintIndentation();
writeRaw(String.format(CONTROLFIELD_OPEN_TEMPLATE, name));
writeEscaped(value.trim());
writeRaw(CONTROLFIELD_CLOSE);
prettyPrintNewLine();
} else if (!currentEntity.equals("leader")) {
prettyPrintIndentation();
writeRaw(String.format(SUBFIELD_OPEN_TEMPLATE, name));
writeEscaped(value.trim());
writeRaw(SUBFIELD_CLOSE);
prettyPrintNewLine();
} else {
if (name.equals(Marc21EventNames.LEADER_ENTITY)) {
prettyPrintIndentation();
writeRaw(LEADER_OPEN_TEMPLATE + value + LEADER_CLOSE_TEMPLATE);
prettyPrintNewLine();
}
}

}

@Override
protected void onResetStream() {
if (!atStreamStart) {
writeFooter();
}
sendAndClearData();
atStreamStart = true;
}

@Override
protected void onCloseStream() {
writeFooter();
sendAndClearData();
}

/** Increments the indentation level by one */
private void incrementIndentationLevel() {
indentationLevel += 1;
}

/** Decrements the indentation level by one */
private void decrementIndentationLevel() {
indentationLevel -= 1;
}

/** Adds a XML Header */
private void writeHeader() {
writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding));
}

/** Closes the root tag */
private void writeFooter() {
writeRaw(ROOT_CLOSE);
}

/** Writes a unescaped sequence */
private void writeRaw(final String str) {
builder.append(str);
}

/** Writes a escaped sequence */
private void writeEscaped(final String str) {
final int len = str.length();
for (int i = 0; i < len; ++i) {
final char c = str.charAt(i);
final String entityName;
switch (c) {
case '&':
entityName = "amp";
break;
case '<':
entityName = "lt";
break;
case '>':
entityName = "gt";
break;
case '\'':
entityName = "apos";
break;
case '"':
entityName = "quot";
break;
default:
entityName = null;
break;
}

if (entityName == null) {
builder.append(c);
} else {
builder.append('&');
builder.append(entityName);
builder.append(';');
}
}
}

private void prettyPrintIndentation() {
if (formatted) {
String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT));
builder.append(prefix);
}
}

private void prettyPrintNewLine() {
if (formatted) {
builder.append(NEW_LINE);
}
}

private void sendAndClearData() {
getReceiver().process(builder.toString());
builder.delete(0, builder.length());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
decode-marc21 org.metafacture.biblio.marc21.Marc21Decoder
encode-marc21 org.metafacture.biblio.marc21.Marc21Encoder
handle-marcxml org.metafacture.biblio.marc21.MarcXmlHandler
encode-marcxml org.metafacture.biblio.marc21.MarcXmlEncoder

decode-pica org.metafacture.biblio.pica.PicaDecoder
encode-pica org.metafacture.biblio.pica.PicaEncoder
Expand Down
Loading