Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve handling of XML attributes and element values. #394

Merged
merged 7 commits into from
Sep 28, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions metafacture-biblio/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,10 @@ dependencies {
testImplementation 'junit:junit:4.12'
testImplementation 'org.mockito:mockito-core:2.5.5'
}

test {
testLogging {
showStandardStreams = true
exceptionFormat = 'full'
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,17 @@
@FluxCommand("handle-marcxml")
public final class MarcXmlHandler extends DefaultXmlPipe<StreamReceiver> {

public static final String DEFAULT_ATTRIBUTE_MARKER = "";

private static final String SUBFIELD = "subfield";
private static final String DATAFIELD = "datafield";
private static final String CONTROLFIELD = "controlfield";
private static final String RECORD = "record";
private static final String NAMESPACE = "http://www.loc.gov/MARC21/slim";
private static final String LEADER = "leader";
private static final String TYPE = "type";

private String attributeMarker = DEFAULT_ATTRIBUTE_MARKER;
private String currentTag = "";
private String namespace = NAMESPACE;
private StringBuilder builder = new StringBuilder();
Expand All @@ -60,6 +64,14 @@ private boolean checkNamespace(final String uri) {
return namespace == null || namespace.equals(uri);
}

public void setAttributeMarker(final String attributeMarker) {
this.attributeMarker = attributeMarker;
}

public String getAttributeMarker() {
return attributeMarker;
}

@Override
public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException {
if (SUBFIELD.equals(localName)) {
Expand All @@ -75,7 +87,7 @@ else if (CONTROLFIELD.equals(localName)) {
}
else if (RECORD.equals(localName) && checkNamespace(uri)) {
getReceiver().startRecord("");
getReceiver().literal(TYPE, attributes.getValue(TYPE));
getReceiver().literal(attributeMarker + TYPE, attributes.getValue(TYPE));
}
else if (LEADER.equals(localName)) {
builder = new StringBuilder();
Expand All @@ -87,18 +99,15 @@ else if (LEADER.equals(localName)) {
public void endElement(final String uri, final String localName, final String qName) throws SAXException {
if (SUBFIELD.equals(localName)) {
getReceiver().literal(currentTag, builder.toString().trim());

}
else if (DATAFIELD.equals(localName)) {
getReceiver().endEntity();
}
else if (CONTROLFIELD.equals(localName)) {
getReceiver().literal(currentTag, builder.toString().trim());

}
else if (RECORD.equals(localName) && checkNamespace(uri)) {
getReceiver().endRecord();

}
else if (LEADER.equals(localName)) {
getReceiver().literal(currentTag, builder.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
import org.junit.Before;
import org.junit.Test;
import org.metafacture.framework.StreamReceiver;
import org.mockito.InOrder;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.MockitoAnnotations;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
Expand Down Expand Up @@ -130,4 +132,39 @@ public void issue330ShouldOptionallyRecognizeRecordsWithoutNamespace()
verifyNoMoreInteractions(receiver);
}

@Test
public void shouldNotEncodeTypeAttributeAsMarkedLiteral() throws SAXException {
final AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(NAMESPACE, "type", "type", "CDATA", "bibliographic");

marcXmlHandler.startElement(NAMESPACE, RECORD, "", attributes);
marcXmlHandler.endElement(NAMESPACE, RECORD, "");

final InOrder ordered = Mockito.inOrder(receiver);
ordered.verify(receiver).startRecord("");
ordered.verify(receiver).literal(TYPE, "bibliographic");
ordered.verify(receiver).endRecord();
ordered.verifyNoMoreInteractions();
verifyNoMoreInteractions(receiver);
}

@Test
public void issue336_shouldEncodeTypeAttributeAsLiteralWithConfiguredMarker() throws SAXException {
final String marker = "~";
marcXmlHandler.setAttributeMarker(marker);

final AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(NAMESPACE, "type", "type", "CDATA", "bibliographic");

marcXmlHandler.startElement(NAMESPACE, RECORD, "", attributes);
marcXmlHandler.endElement(NAMESPACE, RECORD, "");

final InOrder ordered = Mockito.inOrder(receiver);
ordered.verify(receiver).startRecord("");
ordered.verify(receiver).literal(marker + TYPE, "bibliographic");
ordered.verify(receiver).endRecord();
ordered.verifyNoMoreInteractions();
verifyNoMoreInteractions(receiver);
}

}
7 changes: 7 additions & 0 deletions metafacture-xml/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,10 @@ dependencies {
testImplementation 'org.mockito:mockito-core:2.5.5'
testRuntimeOnly 'org.slf4j:slf4j-simple:1.7.21'
}

test {
testLogging {
showStandardStreams = true
exceptionFormat = 'full'
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,19 @@
@FluxCommand("handle-generic-xml")
public final class GenericXmlHandler extends DefaultXmlPipe<StreamReceiver> {

public static final String DEFAULT_ATTRIBUTE_MARKER = "";

public static final String DEFAULT_RECORD_TAG = "record";

public static final String DEFAULT_VALUE_TAG = "value";

public static final boolean EMIT_NAMESPACE = false;

private static final Pattern TABS = Pattern.compile("\t+");

private String attributeMarker = DEFAULT_ATTRIBUTE_MARKER;
private String recordTagName = DEFAULT_RECORD_TAG;
private String valueTagName = DEFAULT_VALUE_TAG;

private boolean inRecord;
private StringBuilder valueBuffer = new StringBuilder();
Expand Down Expand Up @@ -92,6 +98,14 @@ public String getRecordTagName() {
return recordTagName;
}

public void setValueTagName(final String valueTagName) {
this.valueTagName = valueTagName;
}

public String getValueTagName() {
return valueTagName;
}

/**
* Triggers namespace awareness. If set to "true" input data like "foo:bar"
* will be passed through as "foo:bar". For backward compatibility the default
Expand All @@ -110,6 +124,14 @@ public boolean getEmitNamespace() {
return this.emitNamespace;
}

public void setAttributeMarker(final String attributeMarker) {
this.attributeMarker = attributeMarker;
}

public String getAttributeMarker() {
return attributeMarker;
}

@Override
public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) {
if (inRecord) {
Expand Down Expand Up @@ -159,7 +181,7 @@ public void characters(final char[] chars, final int start, final int length) {
private void writeValue() {
final String value = valueBuffer.toString();
if (!value.trim().isEmpty()) {
getReceiver().literal("value", value.replace('\n', ' '));
getReceiver().literal(valueTagName, value.replace('\n', ' '));
}
valueBuffer = new StringBuilder();
}
Expand All @@ -170,7 +192,7 @@ private void writeAttributes(final Attributes attributes) {
for (int i = 0; i < length; ++i) {
final String name = emitNamespace ? attributes.getQName(i) : attributes.getLocalName(i);
final String value = attributes.getValue(i);
getReceiver().literal(name, value);
getReceiver().literal(attributeMarker + name, value);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public final class SimpleXmlEncoder extends DefaultStreamPipe<ObjectReceiver<Str

public static final String DEFAULT_ROOT_TAG = "records";
public static final String DEFAULT_RECORD_TAG = "record";
public static final String DEFAULT_VALUE_TAG = null;

private static final String NEW_LINE = "\n";
private static final String INDENT = "\t";
Expand All @@ -72,8 +73,10 @@ public final class SimpleXmlEncoder extends DefaultStreamPipe<ObjectReceiver<Str

private final StringBuilder builder = new StringBuilder();

private String attributeMarker = ATTRIBUTE_MARKER;
private String rootTag = DEFAULT_ROOT_TAG;
private String recordTag = DEFAULT_RECORD_TAG;
private String valueTag = DEFAULT_VALUE_TAG;
private Map<String, String> namespaces = new HashMap<String, String>();
private boolean writeRootTag = true;
private boolean writeXmlHeader = true;
Expand All @@ -96,6 +99,14 @@ public void setRecordTag(final String tag) {
recordTag = tag;
}

public void setValueTag(final String valueTag) {
this.valueTag = valueTag;
}

public String getValueTag() {
return valueTag;
}

public void setNamespaceFile(final String file) {
final Properties properties;
try {
Expand Down Expand Up @@ -146,6 +157,14 @@ public void setNamespaces(final Map<String, String> namespaces) {
this.namespaces = namespaces;
}

public void setAttributeMarker(final String attributeMarker) {
this.attributeMarker = attributeMarker;
}

public String getAttributeMarker() {
return attributeMarker;
}

@Override
public void startRecord(final String identifier) {
if (separateRoots) {
Expand Down Expand Up @@ -192,11 +211,11 @@ public void endEntity() {

@Override
public void literal(final String name, final String value) {
if (name.isEmpty()) {
if (name.isEmpty() || name.equals(valueTag)) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also set DEFAULT_VALUE_TAG = "" and drop name.isEmpty().

Copy link
Member

@dr0i dr0i Sep 27, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 simplify if you can ... did that as sideeffect of d9ba206.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This won't actually change the behaviour due to SimpleXmlEncoder.Element.writeElement(). But I think we should do it anyway, so we get our semantics straight.

element.setText(value);
}
else if (name.startsWith(ATTRIBUTE_MARKER)) {
element.addAttribute(name.substring(1), value);
else if (name.startsWith(attributeMarker)) {
element.addAttribute(name.substring(attributeMarker.length()), value);
}
else {
element.createChild(name).setText(value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.metafacture.framework.StreamReceiver;
import org.mockito.InOrder;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.MockitoAnnotations;
import org.xml.sax.helpers.AttributesImpl;

Expand Down Expand Up @@ -132,6 +133,28 @@ public void shouldEmitPCDataAsALiteralNamedValue() {
ordered.verify(receiver).literal("value", "char-data");
}

@Test
public void shouldEmitPCDataAsALiteralWithConfiguredValueTagName() {
final String name = "data";
genericXmlHandler.setValueTagName(name);

final char[] charData = "char-data".toCharArray();
genericXmlHandler.startElement("", "record", "record", attributes);
genericXmlHandler.startElement("", "entity", "entity", attributes);
genericXmlHandler.characters(charData, 0, charData.length);
genericXmlHandler.endElement("", "entity", "entity");
genericXmlHandler.endElement("", "record", "record");

final InOrder ordered = inOrder(receiver);
ordered.verify(receiver).startRecord("");
ordered.verify(receiver).startEntity("entity");
ordered.verify(receiver).literal(name, "char-data");
ordered.verify(receiver).endEntity();
ordered.verify(receiver).endRecord();
ordered.verifyNoMoreInteractions();
Mockito.verifyNoMoreInteractions(receiver);
}

@Test
public void shouldEmitNamespaceOnEntityElementAndAttribute() {
genericXmlHandler.setEmitNamespace(true);
Expand All @@ -141,6 +164,47 @@ public void shouldEmitNamespaceOnEntityElementAndAttribute() {

final InOrder ordered = inOrder(receiver);
ordered.verify(receiver).startEntity("ns:entity");
ordered.verify(receiver).literal("ns:attr","attr-value");
ordered.verify(receiver).literal("ns:attr", "attr-value");
}

@Test
public void shouldNotEncodeAttributesAsMarkedLiterals() {
attributes.addAttribute("", "attr", "attr", "CDATA", "attr-value");
genericXmlHandler.startElement("", "record", "record", attributes);
genericXmlHandler.endElement("", "record", "record");

final InOrder ordered = inOrder(receiver);
ordered.verify(receiver).startRecord("");
ordered.verify(receiver).literal("attr", "attr-value");
ordered.verify(receiver).endRecord();
ordered.verifyNoMoreInteractions();
Mockito.verifyNoMoreInteractions(receiver);
}

@Test
public void issue379_shouldEncodeAttributesAsLiteralsWithConfiguredMarker() {
final String marker = "~";
genericXmlHandler.setAttributeMarker(marker);

genericXmlHandler.startElement("", "record", "record", attributes);
attributes.addAttribute("", "authority", "authority", "CDATA", "marcrelator");
attributes.addAttribute("", "type", "type", "CDATA", "text");
genericXmlHandler.startElement("", "roleTerm", "roleTerm", attributes);
final char[] charData = "Author".toCharArray();
genericXmlHandler.characters(charData, 0, charData.length);
genericXmlHandler.endElement("", "roleTerm", "roleTerm");
genericXmlHandler.endElement("", "record", "record");

final InOrder ordered = inOrder(receiver);
ordered.verify(receiver).startRecord("");
ordered.verify(receiver).startEntity("roleTerm");
ordered.verify(receiver).literal(marker + "authority", "marcrelator");
ordered.verify(receiver).literal(marker + "type", "text");
ordered.verify(receiver).literal("value", "Author");
ordered.verify(receiver).endEntity();
ordered.verify(receiver).endRecord();
ordered.verifyNoMoreInteractions();
Mockito.verifyNoMoreInteractions(receiver);
}

}
Loading