Skip to content

Commit

Permalink
Merge pull request #114 from cboehme/issue-98
Browse files Browse the repository at this point in the history
Issue 98: SimpleXmlWriter should only call process once per record
  • Loading branch information
Markus M. Geipel committed Jul 26, 2013
2 parents db3bae2 + f264396 commit 7e8c13a
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 149 deletions.
280 changes: 149 additions & 131 deletions src/main/java/org/culturegraph/mf/stream/sink/SimpleXmlWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,91 +33,95 @@
import org.culturegraph.mf.util.ResourceUtil;

/**
*
*
* writes a stream to XML
*
* @author Markus Michael Geipel
*
*
* @author Markus Michael Geipel, Christoph Böhme
*
*/
@Description("writes a stream to xml")
@In(StreamReceiver.class)
@Out(String.class)
public final class SimpleXmlWriter extends DefaultStreamPipe<ObjectReceiver<String>> {

public static final String ATTRIBUTE_MARKER = "~";
// public static final String TEXT_CONTENT_MARKER = "_text";
public static final String NAMESPACES = "namespaces";
public static final String NEW_LINE = "\n";

private Element element;
public static final String DEFAULT_ROOT_TAG = "records";
public static final String DEFAULT_RECORD_TAG = "record";

private static final String NEW_LINE = "\n";
private static final String INDENT = "\t";

private static final String BEGIN_ATTRIBUTE = "=\"";
private static final String END_ATTRIBUTE = "\"";
private static final String BEGIN_OPEN_ELEMENT = "<";
private static final String END_OPEN_ELEMENT = ">";
private static final String END_EMPTY_ELEMENT = " />";
private static final String BEGIN_CLOSE_ELEMENT = "</";
private static final String END_CLOSE_ELEMENT = ">";

private static final String XML_HEADER = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
private static final String XMLNS_MARKER = " xmlns:";

private final StringBuilder builder = new StringBuilder();

private String rootTag = DEFAULT_ROOT_TAG;
private String recordTag = DEFAULT_RECORD_TAG;
private Map<String, String> namespaces = new HashMap<String, String>();
private String recordTag = "record";
private String rootTag = "records";
private boolean start = true;
private boolean separateRoots;
private boolean writeXmlHeader = true;
private boolean separateRoots;

private Element element;
private boolean atStreamStart = true;

public void setRootTag(final String rootTag) {
this.rootTag = rootTag;
}

public void setWriteXmlHeader(final boolean writeXmlHeader) {
this.writeXmlHeader = writeXmlHeader;
}

public void setSeparateRoots(final boolean separateRoots) {
this.separateRoots = separateRoots;
public void setRecordTag(final String tag) {
recordTag = tag;
}

public void setNamespaceFile(final String file) {
final Properties properties = ResourceUtil.loadProperties(file);
for (Entry<Object, Object> entry : properties.entrySet()) {
for (final Entry<Object, Object> entry : properties.entrySet()) {
namespaces.put(entry.getKey().toString(), entry.getValue().toString());
}
}

private void writeHeader() {
final StringBuilder builder = new StringBuilder();
public void setWriteXmlHeader(final boolean writeXmlHeader) {
this.writeXmlHeader = writeXmlHeader;
}

if (writeXmlHeader) {
builder.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
}
public void setSeparateRoots(final boolean separateRoots) {
this.separateRoots = separateRoots;
}

builder.append("<");
builder.append(rootTag);
for (Entry<String, String> entry : namespaces.entrySet()) {
builder.append(" xmlns:");
builder.append(entry.getKey());
builder.append("=\"");
escape(builder, entry.getValue());
builder.append("\"");
}
builder.append(">");
getReceiver().process(builder.toString());
start = false;
public void configure(final MultiMap multimap) {
this.namespaces = multimap.getMap(NAMESPACES);
}

@Override
public void startRecord(final String identifier) {
if (separateRoots || start) {
if (separateRoots) {
writeHeader();
} else if (atStreamStart) {
writeHeader();
sendAndClearData();
}
atStreamStart = false;

element = new Element(recordTag);
}

@Override
public void endRecord() {
if (recordTag.isEmpty()) {
final StringBuilder builder = new StringBuilder();
for (Element child : element.getChildren()) {
child.writeToStringBuilder(builder, 1);
}
getReceiver().process(builder.toString());
} else {
getReceiver().process(element.toString());
}
element.writeElement(builder, 1);
if (separateRoots) {
writeFooter();
}
sendAndClearData();
}

@Override
Expand All @@ -137,42 +141,106 @@ public void literal(final String name, final String value) {
} else if (name.startsWith(ATTRIBUTE_MARKER)) {
element.addAttribute(name.substring(1), value);
} else {
final Element temp = element.createChild(name);
temp.setText(value);
element.createChild(name).setText(value);
}
}

public void configure(final MultiMap multimap) {
this.namespaces = multimap.getMap(NAMESPACES);
}

public void setRecordTag(final String tag) {
recordTag = tag;
@Override
protected void onResetStream() {
writeFooter();
sendAndClearData();
atStreamStart = true;
}

@Override
protected void onCloseStream() {
if (!separateRoots) {
writeFooter();
sendAndClearData();
}
}

private void sendAndClearData() {
getReceiver().process(builder.toString());
builder.delete(0, builder.length());
}

private void writeHeader() {
if (writeXmlHeader) {
builder.append(XML_HEADER);
}

builder.append(BEGIN_OPEN_ELEMENT);
builder.append(rootTag);
for (final Entry<String, String> entry : namespaces.entrySet()) {
builder.append(XMLNS_MARKER);
builder.append(entry.getKey());
builder.append(BEGIN_ATTRIBUTE);
writeEscaped(builder, entry.getValue());
builder.append(END_ATTRIBUTE);
}
builder.append(END_OPEN_ELEMENT);
}

private void writeFooter() {
getReceiver().process("</" + rootTag + ">");
builder.append(NEW_LINE);
builder.append(BEGIN_CLOSE_ELEMENT);
builder.append(rootTag);
builder.append(END_CLOSE_ELEMENT);
}

protected static void writeEscaped(final StringBuilder builder, final String str) {

final int len = str.length();
for (int i = 0; i < len; ++i) {
final char c = str.charAt(i);
final String entityName;
switch (c) {
case '&':
entityName = "amp";
break;
case '<':
entityName = "lt";
break;
case '>':
entityName = "gt";
break;
case '\'':
entityName = "apos";
break;
case '"':
entityName = "quot";
break;
default:
entityName = null;
break;
}

if (entityName == null) {
builder.append(c);
} else {
builder.append('&');
builder.append(entityName);
builder.append(';');
}
}
}

/**
* An XML element.
*
*/
private static final class Element {

private static final List<Element> NO_CHILDREN = Collections.emptyList();

private final StringBuilder attributes = new StringBuilder();
private String text = "";
private List<Element> children = NO_CHILDREN;
private final Element parent;
private final String name;

private String text = "";
private List<Element> children = NO_CHILDREN;

public Element(final String name) {
this.name = name;
this.parent = null;
Expand All @@ -183,16 +251,12 @@ private Element(final String name, final Element parent) {
this.parent = parent;
}

public List<Element> getChildren() {
return children;
}

public void addAttribute(final String name, final String value) {
attributes.append(" ");
attributes.append(name);
attributes.append("=\"");
escape(attributes, value);
attributes.append("\"");
attributes.append(BEGIN_ATTRIBUTE);
writeEscaped(attributes, value);
attributes.append(END_ATTRIBUTE);
}

public void setText(final String text) {
Expand All @@ -212,90 +276,44 @@ public Element getParent() {
return parent;
}

@Override
public String toString() {
final StringBuilder builder = new StringBuilder();
writeToStringBuilder(builder, 1);
return builder.toString();
}

public void writeToStringBuilder(final StringBuilder builder, final int indent) {
builder.append(NEW_LINE);
indent(builder, indent);
builder.append("<");
builder.append(name);
builder.append(attributes);
if (text.isEmpty() && children.isEmpty()) {
builder.append(" /");
public void writeElement(final StringBuilder builder, final int indent) {
if (!name.isEmpty()) {
builder.append(NEW_LINE);
writeIndent(builder, indent);
builder.append(BEGIN_OPEN_ELEMENT);
builder.append(name);
builder.append(attributes);
if (text.isEmpty() && children.isEmpty()) {
builder.append(END_EMPTY_ELEMENT);
return;
}
builder.append(END_OPEN_ELEMENT);
}

builder.append(">");
writeEscaped(builder, text);

escape(builder, text);

for (Element element : children) {
element.writeToStringBuilder(builder, indent + 1);
for (final Element element : children) {
element.writeElement(builder, indent + 1);
}

if (text.isEmpty() && !children.isEmpty()) {
builder.append(NEW_LINE);
indent(builder, indent);
writeIndent(builder, indent);
}

if (!text.isEmpty() || !children.isEmpty()) {
builder.append("</");
if (!name.isEmpty()) {
builder.append(BEGIN_CLOSE_ELEMENT);
builder.append(name);
builder.append(">");
builder.append(END_CLOSE_ELEMENT);
}
}

private static void indent(final StringBuilder builder, final int indent) {
private static void writeIndent(final StringBuilder builder, final int indent) {
for (int i = 0; i < indent; ++i) {
builder.append("\t");
builder.append(INDENT);
}
}
}

@Override
protected void onResetStream() {
writeFooter();
start = true;
}

protected static void escape(final StringBuilder builder, final String str) {

final int len = str.length();
for (int i = 0; i < len; ++i) {
final char c = str.charAt(i);
final String entityName;
switch (c) {
case '&':
entityName = "amp";
break;
case '<':
entityName = "lt";
break;
case '>':
entityName = "gt";
break;
case '\'':
entityName = "apos";
break;
case '"':
entityName = "quot";
break;
default:
entityName = null;
break;
}

if (entityName == null) {
builder.append(c);
} else {
builder.append('&');
builder.append(entityName);
builder.append(';');
}
}
}

}
Loading

0 comments on commit 7e8c13a

Please sign in to comment.