Skip to content

Commit be6885b

Browse files
Remove special chars from xml output
--------- Co-authored-by: Michael Osipov <1983-01-06@gmx.net> (cherry picked from commit c9d72af)
1 parent 4274b2e commit be6885b

File tree

4 files changed

+93
-19
lines changed

4 files changed

+93
-19
lines changed

src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java

+20-5
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
import java.util.regex.Pattern;
2424

2525
/**
26-
* Implementation of XMLWriter which emits nicely formatted documents.
27-
*
26+
* <p>Implementation of XMLWriter which emits nicely formatted documents.</p>
2827
*
28+
* <p>C0n control characters except <code>\n</code>, <code>\r</code>, and <code>\t</code> are omitted from output</p>
2929
*/
3030
public class PrettyPrintXMLWriter implements XMLWriter {
3131
/** Line separator ("\n" on UNIX) */
@@ -185,7 +185,7 @@ private void writeText(String text, boolean escapeXml) {
185185
finishTag();
186186

187187
if (escapeXml) {
188-
text = escapeXml(text);
188+
text = escapeXmlText(text);
189189
}
190190

191191
write(StringUtils.unifyLineSeparators(text, lineSeparator));
@@ -225,10 +225,12 @@ private static String escapeXml(String text) {
225225

226226
private static final Pattern crlf = Pattern.compile(crlf_str);
227227

228-
private static final Pattern lowers = Pattern.compile("([\000-\037])");
228+
private static final Pattern lowers = Pattern.compile("([\\x00-\\x1F])");
229+
230+
private static final Pattern illegalC0Characters = Pattern.compile("([\\x00-\\x08\\x0B-\\x0C\\x0E-\\x1F])");
229231

230232
private static String escapeXmlAttribute(String text) {
231-
text = escapeXml(text);
233+
text = escapeXmlText(text);
232234

233235
// Windows
234236
Matcher crlfmatcher = crlf.matcher(text);
@@ -246,6 +248,19 @@ private static String escapeXmlAttribute(String text) {
246248
return b.toString();
247249
}
248250

251+
private static String escapeXmlText(String text) {
252+
text = escapeXml(text);
253+
254+
Matcher matcher = illegalC0Characters.matcher(text);
255+
StringBuffer b = new StringBuffer();
256+
while (matcher.find()) {
257+
matcher = matcher.appendReplacement(b, "");
258+
}
259+
matcher.appendTail(b);
260+
261+
return b.toString();
262+
}
263+
249264
/** {@inheritDoc} */
250265
@Override
251266
public void addAttribute(String key, String value) {

src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java

+4-13
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
* <li>PROPERTY_SERIALIZER_INDENTATION
2525
* <li>PROPERTY_SERIALIZER_LINE_SEPARATOR
2626
* </ul>
27+
* <p>C0n control characters except <code>\n</code>, <code>\r</code>, and <code>\t</code> are omitted from output</p>
2728
*/
2829
public class MXSerializer implements XmlSerializer {
2930
protected static final String XML_URI = "http://www.w3.org/XML/1998/namespace";
@@ -943,19 +944,9 @@ protected void writeElementContent(String text, Writer out) throws IOException {
943944
// out.write(';');
944945
// pos = i + 1;
945946
} else {
946-
throw new IllegalStateException(
947-
"character " + Integer.toString(ch) + " is not allowed in output" + getLocation());
948-
// in XML 1.1 legal are [#x1-#xD7FF]
949-
// if(ch > 0) {
950-
// if(i > pos) out.write(text.substring(pos, i));
951-
// out.write("&#");
952-
// out.write(Integer.toString(ch));
953-
// out.write(';');
954-
// pos = i + 1;
955-
// } else {
956-
// throw new IllegalStateException(
957-
// "character zero is not allowed in XML 1.1 output"+getLocation());
958-
// }
947+
// skip special char
948+
if (i > pos) out.write(text.substring(pos, i));
949+
pos = i + 1;
959950
}
960951
}
961952
if (seenBracket) {

src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java

+11-1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ private String createExpectedXML(boolean escape) {
8282
buf.append(LS);
8383
buf.append(" </el6>");
8484
buf.append(LS);
85+
if (escape) {
86+
buf.append(" <el8>special-char-</el8>");
87+
} else {
88+
buf.append(" <el8>special-char-" + (char) 7 + "</el8>");
89+
}
90+
buf.append(LS);
8591
buf.append("</root>");
8692

8793
return buf.toString();
@@ -95,7 +101,7 @@ private Xpp3Dom createXpp3Dom() {
95101
dom.addChild(el1);
96102

97103
Xpp3Dom el2 = new Xpp3Dom("el2");
98-
el2.setAttribute("att2", "attribute2\nnextline");
104+
el2.setAttribute("att2", "attribute2\nnextline" + (char) 7);
99105
dom.addChild(el2);
100106

101107
Xpp3Dom el3 = new Xpp3Dom("el3");
@@ -119,6 +125,10 @@ private Xpp3Dom createXpp3Dom() {
119125
el7.setValue("element7\n&\"\'<>");
120126
el6.addChild(el7);
121127

128+
Xpp3Dom el8 = new Xpp3Dom("el8");
129+
el8.setValue("special-char-" + (char) 7);
130+
131+
dom.addChild(el8);
122132
return dom;
123133
}
124134
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package org.codehaus.plexus.util.xml.pull;
2+
3+
import java.io.StringReader;
4+
import java.io.StringWriter;
5+
import java.util.Arrays;
6+
7+
import org.junit.jupiter.api.Test;
8+
9+
import static org.junit.jupiter.api.Assertions.assertEquals;
10+
11+
class MXSerializerTest {
12+
13+
@Test
14+
void testSerialize() throws Exception {
15+
16+
StringWriter writer = new StringWriter();
17+
18+
MXSerializer sr = new MXSerializer();
19+
sr.setOutput(writer);
20+
21+
sr.startDocument(null, Boolean.TRUE);
22+
sr.startTag(null, "root");
23+
for (int i : Arrays.asList(8, 9, 10, 11, 13, 15)) {
24+
sr.startTag(null, "char");
25+
sr.text(Character.getName(i) + ": " + ((char) i));
26+
sr.endTag(null, "char");
27+
}
28+
29+
sr.endTag(null, "root");
30+
sr.endDocument();
31+
assertEquals(expectedOutput(), writer.toString());
32+
}
33+
34+
@Test
35+
void testDeserialize() throws Exception {
36+
MXParser parser = new MXParser();
37+
parser.setInput(new StringReader(expectedOutput()));
38+
int eventType = parser.getEventType();
39+
40+
while (eventType != XmlPullParser.END_DOCUMENT) {
41+
eventType = parser.next();
42+
}
43+
}
44+
45+
private String expectedOutput() {
46+
StringBuilder out = new StringBuilder();
47+
out.append("<?xml version=\"1.0\" standalone=\"yes\"?>");
48+
out.append("<root>");
49+
out.append("<char>BACKSPACE: </char>");
50+
out.append("<char>CHARACTER TABULATION: \t</char>");
51+
out.append("<char>LINE FEED (LF): \n</char>");
52+
out.append("<char>LINE TABULATION: </char>");
53+
out.append("<char>CARRIAGE RETURN (CR): \r</char>");
54+
out.append("<char>SHIFT IN: </char>");
55+
out.append("</root>");
56+
return out.toString();
57+
}
58+
}

0 commit comments

Comments
 (0)