diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/iso2709/Record.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/iso2709/Record.java index 290f66e53..38ba469be 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/iso2709/Record.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/iso2709/Record.java @@ -156,6 +156,14 @@ public String getRecordId() { return buffer.stringAt(dataStart, dataLength, charset); } + /** + * Returns the record leader. + * + * @return a string which is the record leader. + */ + public String getLabel() { + return label.toString(); + } /** * Iterates through all fields in the record and calls the appropriate method * on the supplied {@link FieldHandler} instance. diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Decoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Decoder.java index 437739b65..e52b54e4e 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Decoder.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Decoder.java @@ -140,6 +140,7 @@ public final class Marc21Decoder private final FieldHandler fieldHandler = new Marc21Handler(); private boolean ignoreMissingId; + private boolean emitLeaderAsWhole; /** * Controls whether the decoder aborts processing if a record has no @@ -164,6 +165,24 @@ public boolean getIgnoreMissingId() { return ignoreMissingId; } + /** + * Controls whether the Record Leader should be emitted as a whole instead of + * extracting the bibliographic information in the record leader. + * + * @see MARC 21 + * Standard: Record Leader + * + * @param emitLeaderAsWhole + * true if the leader should be emitted as a whole. + */ + public void setEmitLeaderAsWhole(final boolean emitLeaderAsWhole) { + this.emitLeaderAsWhole = emitLeaderAsWhole; + } + + public boolean getEmitLeaderAsWhole() { + return emitLeaderAsWhole; + } + @Override public void process(final String obj) { if (obj.isEmpty()) { @@ -207,9 +226,12 @@ private String tryGetRecordId(final Record record) { } private void emitLeader(final Record record) { + getReceiver().startEntity(Marc21EventNames.LEADER_ENTITY); + if (emitLeaderAsWhole){ + getReceiver().literal(Marc21EventNames.LEADER_ENTITY, record.getLabel()); + }else { final char[] implCodes = record.getImplCodes(); final char[] systemChars = record.getSystemChars(); - getReceiver().startEntity(Marc21EventNames.LEADER_ENTITY); getReceiver().literal(Marc21EventNames.RECORD_STATUS_LITERAL, String.valueOf( record.getRecordStatus())); getReceiver().literal(Marc21EventNames.RECORD_TYPE_LITERAL, String.valueOf( @@ -226,6 +248,7 @@ private void emitLeader(final Record record) { systemChars[Marc21Constants.CATALOGING_FORM_INDEX])); getReceiver().literal(Marc21EventNames.MULTIPART_LEVEL_LITERAL, String.valueOf( systemChars[Marc21Constants.MULTIPART_LEVEL_INDEX])); + } getReceiver().endEntity(); } diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java new file mode 100644 index 000000000..330584dd4 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java @@ -0,0 +1,275 @@ +package org.metafacture.biblio.marc21; + +import java.util.Collections; + +import org.metafacture.framework.FluxCommand; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.ObjectReceiver; +import org.metafacture.framework.StreamReceiver; +import org.metafacture.framework.annotations.Description; +import org.metafacture.framework.annotations.In; +import org.metafacture.framework.annotations.Out; +import org.metafacture.framework.helpers.DefaultStreamPipe; + +/** + * Encodes a stream into MARCXML. + * + * @author some Jan (Eberhardt) did almost all + * @author Pascal Christoph (dr0i) dug it up again + */ + +@Description("Encodes a stream into MARCXML.") +@In(StreamReceiver.class) +@Out(String.class) +@FluxCommand("encode-marc21") +public final class MarcXmlEncoder extends DefaultStreamPipe> { + private static final String ROOT_OPEN = ""; + private static final String ROOT_CLOSE = ""; + + private static final String RECORD_OPEN = ""; + private static final String RECORD_CLOSE = ""; + + private static final String CONTROLFIELD_OPEN_TEMPLATE = ""; + private static final String CONTROLFIELD_CLOSE = ""; + + private static final String DATAFIELD_OPEN_TEMPLATE = ""; + private static final String DATAFIELD_CLOSE = ""; + + private static final String SUBFIELD_OPEN_TEMPLATE = ""; + private static final String SUBFIELD_CLOSE = ""; + + private static final String LEADER_OPEN_TEMPLATE = ""; + private static final String LEADER_CLOSE_TEMPLATE = ""; + + private static final String NEW_LINE = "\n"; + private static final String INDENT = "\t"; + + private static final String XML_DECLARATION_TEMPLATE = ""; + + private final StringBuilder builder; + + private boolean atStreamStart; + + private boolean omitXmlDeclaration; + private String xmlVersion; + private String xmlEncoding; + + private String currentEntity; + private int indentationLevel; + private boolean formatted; + + public MarcXmlEncoder() { + this.builder = new StringBuilder(); + this.atStreamStart = true; + + this.omitXmlDeclaration = false; + this.xmlVersion = "1.0"; + this.xmlEncoding = "UTF-8"; + + this.currentEntity = ""; + + this.indentationLevel = 0; + this.formatted = true; + } + + public void omitXmlDeclaration(boolean omitXmlDeclaration) { + this.omitXmlDeclaration = omitXmlDeclaration; + } + + public void setXmlVersion(String xmlVersion) { + this.xmlVersion = xmlVersion; + } + + public void setXmlEncoding(String xmlEncoding) { + this.xmlEncoding = xmlEncoding; + } + + /** + * Formats the resulting xml, by indentation. + * + * @param formatted + * True, if formatting is activated. + */ + public void setFormatted(boolean formatted) { + this.formatted = formatted; + } + + @Override + public void startRecord(final String identifier) { + if (atStreamStart) { + if (!omitXmlDeclaration) { + writeHeader(); + prettyPrintNewLine(); + } + writeRaw(ROOT_OPEN); + prettyPrintNewLine(); + incrementIndentationLevel(); + } + atStreamStart = false; + + prettyPrintIndentation(); + writeRaw(RECORD_OPEN); + prettyPrintNewLine(); + + incrementIndentationLevel(); + } + + @Override + public void endRecord() { + decrementIndentationLevel(); + prettyPrintIndentation(); + writeRaw(RECORD_CLOSE); + prettyPrintNewLine(); + sendAndClearData(); + } + + @Override + public void startEntity(final String name) { + currentEntity = name; + if (!name.equals("leader")) { + if (name.length() != 5) { + String message = String.format("Entity too short." + "Got a string ('%s') of length %d." + + "Expected a length of 5 (field + indicators).", name, name.length()); + throw new MetafactureException(message); + } + + String tag = name.substring(0, 3); + String ind1 = name.substring(3, 4); + String ind2 = name.substring(4, 5); + prettyPrintIndentation(); + writeRaw(String.format(DATAFIELD_OPEN_TEMPLATE, tag, ind1, ind2)); + prettyPrintNewLine(); + incrementIndentationLevel(); + } + } + + @Override + public void endEntity() { + if (!currentEntity.equals("leader")) { + decrementIndentationLevel(); + prettyPrintIndentation(); + writeRaw(DATAFIELD_CLOSE); + prettyPrintNewLine(); + } + currentEntity = ""; + } + + @Override + public void literal(final String name, final String value) { + if (currentEntity.equals("")) { + prettyPrintIndentation(); + writeRaw(String.format(CONTROLFIELD_OPEN_TEMPLATE, name)); + writeEscaped(value.trim()); + writeRaw(CONTROLFIELD_CLOSE); + prettyPrintNewLine(); + } else if (!currentEntity.equals("leader")) { + prettyPrintIndentation(); + writeRaw(String.format(SUBFIELD_OPEN_TEMPLATE, name)); + writeEscaped(value.trim()); + writeRaw(SUBFIELD_CLOSE); + prettyPrintNewLine(); + } else { + if (name.equals(Marc21EventNames.LEADER_ENTITY)) { + prettyPrintIndentation(); + writeRaw(LEADER_OPEN_TEMPLATE + value + LEADER_CLOSE_TEMPLATE); + prettyPrintNewLine(); + } + } + + } + + @Override + protected void onResetStream() { + if (!atStreamStart) { + writeFooter(); + } + sendAndClearData(); + atStreamStart = true; + } + + @Override + protected void onCloseStream() { + writeFooter(); + sendAndClearData(); + } + + /** Increments the indentation level by one */ + private void incrementIndentationLevel() { + indentationLevel += 1; + } + + /** Decrements the indentation level by one */ + private void decrementIndentationLevel() { + indentationLevel -= 1; + } + + /** Adds a XML Header */ + private void writeHeader() { + writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding)); + } + + /** Closes the root tag */ + private void writeFooter() { + writeRaw(ROOT_CLOSE); + } + + /** Writes a unescaped sequence */ + private void writeRaw(final String str) { + builder.append(str); + } + + /** Writes a escaped sequence */ + private void writeEscaped(final String str) { + final int len = str.length(); + for (int i = 0; i < len; ++i) { + final char c = str.charAt(i); + final String entityName; + switch (c) { + case '&': + entityName = "amp"; + break; + case '<': + entityName = "lt"; + break; + case '>': + entityName = "gt"; + break; + case '\'': + entityName = "apos"; + break; + case '"': + entityName = "quot"; + break; + default: + entityName = null; + break; + } + + if (entityName == null) { + builder.append(c); + } else { + builder.append('&'); + builder.append(entityName); + builder.append(';'); + } + } + } + + private void prettyPrintIndentation() { + if (formatted) { + String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT)); + builder.append(prefix); + } + } + + private void prettyPrintNewLine() { + if (formatted) { + builder.append(NEW_LINE); + } + } + + private void sendAndClearData() { + getReceiver().process(builder.toString()); + builder.delete(0, builder.length()); + } +} \ No newline at end of file diff --git a/metafacture-biblio/src/main/resources/flux-commands.properties b/metafacture-biblio/src/main/resources/flux-commands.properties index 91d97c488..25ce5d5bf 100644 --- a/metafacture-biblio/src/main/resources/flux-commands.properties +++ b/metafacture-biblio/src/main/resources/flux-commands.properties @@ -16,6 +16,7 @@ decode-marc21 org.metafacture.biblio.marc21.Marc21Decoder encode-marc21 org.metafacture.biblio.marc21.Marc21Encoder handle-marcxml org.metafacture.biblio.marc21.MarcXmlHandler +encode-marcxml org.metafacture.biblio.marc21.MarcXmlEncoder decode-pica org.metafacture.biblio.pica.PicaDecoder encode-pica org.metafacture.biblio.pica.PicaEncoder diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java new file mode 100644 index 000000000..5c37374a6 --- /dev/null +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java @@ -0,0 +1,161 @@ +/* + * Copyright 2019 Pascal Christoph (hbz) + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.metafacture.biblio.marc21; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.metafacture.framework.helpers.DefaultObjectReceiver; + +/** + * Tests for class {@link MarcXmlEncoder}. + * + * @author some Jan (Eberhardt) did almost all + * @author Pascal Christoph (dr0i) dug it up again + * + */ + +public class MarcXmlEncoderTest { + private static StringBuilder resultCollector ; + private static MarcXmlEncoder encoder ; + private static final String XML_DECLARATION = ""; + private static final String XML_1_DECLARATION = ""; + private static final String XML_16_DECLARATION = ""; + private static final String XML_ROOT_OPEN = ""; + private static final String XML_RECORD = "92005291" + + "92005291" + + ""; + private static final String XML_MARC_COLLECTION_END_TAG = ""; + private static final String RECORD_ID = "92005291"; + + @Before + public void setUp() throws Exception { + encoder = new MarcXmlEncoder(); + encoder.setFormatted(false); + encoder.setReceiver(new DefaultObjectReceiver() { + @Override + public void process(final String obj) { + resultCollector.append(obj); + } + }); + resultCollector= new StringBuilder(); + } + + @After + public void tearDown() throws Exception { + } + + private void addOneRecord(MarcXmlEncoder encoder) { + encoder.startRecord(RECORD_ID); + encoder.literal("001", RECORD_ID); + encoder.startEntity("010 "); + encoder.literal("a", RECORD_ID); + encoder.endEntity(); + encoder.endRecord(); + } + + @Test + public void doNotOmitXmlDeclaration() throws Exception { + encoder.omitXmlDeclaration(false); + addOneRecord(encoder); + encoder.closeStream(); + + String actual = resultCollector.toString(); + assertTrue(actual.startsWith(XML_DECLARATION)); + } + + @Test + public void omitXmlDeclaration() throws Exception { + encoder.omitXmlDeclaration(true); + addOneRecord(encoder); + encoder.closeStream(); + String actual = resultCollector.toString(); + assertTrue(actual.startsWith("" + + XML_MARC_COLLECTION_END_TAG; + String actual = resultCollector.toString(); + assertEquals(expected, actual); + } + + @Test + public void createARecord() throws Exception { + addOneRecord(encoder); + encoder.closeStream(); + String expected = XML_DECLARATION + XML_ROOT_OPEN + XML_RECORD + ""; + String actual = resultCollector.toString(); + assertEquals(expected, actual); + } + + @Test + public void createTwoRecordsInOneCollection() throws Exception { + addOneRecord(encoder); + addOneRecord(encoder); + encoder.closeStream(); + String expected = XML_DECLARATION + XML_ROOT_OPEN + XML_RECORD + XML_RECORD + ""; + String actual = resultCollector.toString(); + assertEquals(expected, actual); + } + + @Test + public void createAnRecordWithLeader() throws Exception { + encoder.startRecord("1"); + encoder.startEntity("leader"); + encoder.literal("leader", "dummy"); + encoder.endEntity(); + encoder.endRecord(); + encoder.closeStream(); + String expected = XML_DECLARATION + XML_ROOT_OPEN + "dummy" + + XML_MARC_COLLECTION_END_TAG; + String actual = resultCollector.toString(); + assertEquals(expected, actual); + } +} diff --git a/metafacture-strings/src/main/java/org/metafacture/strings/LineRecorder.java b/metafacture-strings/src/main/java/org/metafacture/strings/LineRecorder.java index 588353d61..9fe465fca 100644 --- a/metafacture-strings/src/main/java/org/metafacture/strings/LineRecorder.java +++ b/metafacture-strings/src/main/java/org/metafacture/strings/LineRecorder.java @@ -1,5 +1,4 @@ -/* - * Copyright 2019 hbz +/* Copyright 2019 Pascal Christoph (hbz) * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. @@ -13,46 +12,81 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.metafacture.strings; import org.metafacture.framework.FluxCommand; +import org.metafacture.framework.ObjectPipe; import org.metafacture.framework.ObjectReceiver; import org.metafacture.framework.annotations.Description; import org.metafacture.framework.annotations.In; import org.metafacture.framework.annotations.Out; -import org.metafacture.framework.helpers.DefaultObjectPipe; /** * Collects strings and emits them as records when a line matches the pattern. - * Appends to every incoming line a line feed so that the original structure is - * preserved. + * Appends to every incoming line a line feed so that the original structure + * is preserved. * * @author Pascal Christoph (dr0i). * */ -@Description("Collects strings and emits them as records when a line matches the pattern.") -@In(String.class) -@Out(String.class) -@FluxCommand("lines-to-records") +@Description ( "Collects strings and emits them as records when a" + +" line matches the pattern or the stream is closed." ) +@In ( String.class ) +@Out ( String.class ) +@FluxCommand ( "lines-to-records" ) public final class LineRecorder - extends DefaultObjectPipe> { + implements ObjectPipe> { + + private final static int SB_CAPACITY=4096*7; + // empty line is the default + private String recordMarkerRegexp="^\\s*$"; + StringBuilder record=new StringBuilder( + SB_CAPACITY); + ObjectReceiver receiver; + + public void setRecordMarkerRegexp ( final String regexp ) { + recordMarkerRegexp=regexp; + } + + @Override + public void process ( final String line ) { + if(line.matches( + recordMarkerRegexp)){ + getReceiver().process( + record.toString()); + record=new StringBuilder( + SB_CAPACITY); + }else + record.append( + line+"\n"); + } - private final int SB_CAPACITY = 4096 * 7; - private String recordMarkerRegexp = "^\\s*$"; // empty line is default - StringBuilder record = new StringBuilder(SB_CAPACITY); + @Override + public void resetStream ( ) { + record=new StringBuilder( + SB_CAPACITY); + } - public void setRecordMarkerRegexp(final String regexp) { - this.recordMarkerRegexp = regexp; + @Override + public void closeStream ( ) { + getReceiver().process( + record.toString()); } @Override - public void process(final String line) { - assert !isClosed(); - if (line.matches(recordMarkerRegexp)) { - getReceiver().process(record.toString()); - record = new StringBuilder(SB_CAPACITY); - } else - record.append(line + "\n"); + public > R setReceiver ( R receiver ) { + this.receiver=receiver; + return receiver; + } + + /** + * Returns a reference to the downstream module. + * + * @return reference to the downstream module + */ + protected final ObjectReceiver getReceiver ( ) { + return receiver; } } diff --git a/metafacture-strings/src/test/java/org/metafacture/strings/LineRecorderTest.java b/metafacture-strings/src/test/java/org/metafacture/strings/LineRecorderTest.java index 7573b58de..eefc21208 100644 --- a/metafacture-strings/src/test/java/org/metafacture/strings/LineRecorderTest.java +++ b/metafacture-strings/src/test/java/org/metafacture/strings/LineRecorderTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2019 Pascal Christoph + * Copyright 2019 Pascal Christoph (hbz) * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. @@ -99,4 +99,18 @@ public void shouldEmitRecordWithNonDefaultRecordMarker() { ordered.verifyNoMoreInteractions(); } + @Test + public void shouldEmitLastRecordWithoutRecordMarkerWhenClosingStream() { + lineRecorder.process(RECORD3_PART1); + lineRecorder.process(RECORD3_PART2); + lineRecorder.closeStream(); + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).process( + RECORD3_PART1 + + LINE_SEPARATOR + + RECORD3_PART2 + + LINE_SEPARATOR); + ordered.verifyNoMoreInteractions(); + } + }