Skip to content

Commit b37787e

Browse files
committed
Initial JSON encoding support check-in
1 parent 3489790 commit b37787e

File tree

4 files changed

+677
-103
lines changed

4 files changed

+677
-103
lines changed

src/main/java/org/owasp/esapi/Encoder.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
* <li>JavaScript Escaping</li>
4646
* <li>MySQL Database Escaping</li>
4747
* <li>Oracle Database Escaping</li>
48+
* <li>JSON Escaping</li>
4849
* <li>Percent Encoding (aka URL Encoding)</li>
4950
* <li>Unix Shell Escaping</li>
5051
* <li>VBScript Escaping</li>
@@ -556,6 +557,22 @@ public interface Encoder {
556557
*/
557558
String encodeForURL(String input) throws EncodingException;
558559

560+
/**
561+
* Encode data for use in JSON strings. This method performs <a
562+
* href="https://datatracker.ietf.org/doc/html/rfc8259#section-7">String escaping</a>
563+
* on the entire string according to RFC 8259, Section 7.
564+
*
565+
* @see <a href="https://datatracker.ietf.org/doc/html/rfc8259#section-7">RFC 8259,
566+
* The JavaScript Object Notation (JSON) Data Interchange Format, Section 7</a>
567+
*
568+
* @param input
569+
* the text to escape for JSON string
570+
*
571+
* @return input
572+
* escaped for use in JSON string
573+
*/
574+
String encodeForJSON(String input);
575+
559576
/**
560577
* Decode from URL. Implementations should first canonicalize and
561578
* detect any double-encoding. If this check passes, then the data is decoded using URL
@@ -607,4 +624,19 @@ public interface Encoder {
607624
*/
608625
String getCanonicalizedURI(URI dirtyUri);
609626

627+
/**
628+
* Decode data encoded for JSON strings. This method removes <a
629+
* href="https://datatracker.ietf.org/doc/html/rfc8259#section-7">String escaping</a>
630+
* on the entire string according to RFC 8259, Section 7.
631+
*
632+
* @see <a href="https://datatracker.ietf.org/doc/html/rfc8259#section-7">RFC 8259,
633+
* The JavaScript Object Notation (JSON) Data Interchange Format, Section 7</a>
634+
*
635+
* @param input
636+
* the JSON string to decode
637+
*
638+
* @return input
639+
* decoded from JSON string
640+
*/
641+
String decodeFromJSON(String input);
610642
}
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
/**
2+
* OWASP Enterprise Security API (ESAPI)
3+
*
4+
* This file is part of the Open Web Application Security Project (OWASP)
5+
* Enterprise Security API (ESAPI) project. For details, please see
6+
* <a href="http://www.owasp.org/index.php/ESAPI">http://www.owasp.org/index.php/ESAPI</a>.
7+
*
8+
* Copyright (c) 2022 - The OWASP Foundation
9+
*
10+
* The ESAPI is published by OWASP under the BSD license. You should read and accept the
11+
* LICENSE before you use, modify, and/or redistribute this software.
12+
*
13+
* @author Jeffrey Walton (noloader .at. gmail.com)
14+
* @author Kevin Wall (kevin.w.wall .at. gmail.com)
15+
* @author Matt Seil (matt.seil .at. owasp.org)
16+
* @created 2022
17+
*/
18+
package org.owasp.esapi.codecs;
19+
20+
/**
21+
* Implementation of the Codec interface for JSON strings.
22+
* This class performs <a
23+
* href="https://datatracker.ietf.org/doc/html/rfc8259#section-7">String escaping</a>
24+
* on the entire string according to RFC 8259, Section 7.
25+
*
26+
* RFC 8259 requires conforming implementations use UTF-8. However, the ESAPI interfaces
27+
* utilize Java strings, which are UTF-16. This may cause problems during encoding and
28+
* decoding operations. To avoid some of the problems, convert the string to UTF-8 before
29+
* encoding and from UTF-8 after decoding. Ultimately the ESAPI encoder interfaces will
30+
* need modification to provide byte array arguments and return values.
31+
*
32+
* @see <a href="https://datatracker.ietf.org/doc/html/rfc8259#section-7">RFC 8259,
33+
* The JavaScript Object Notation (JSON) Data Interchange Format, Section 7</a>
34+
*
35+
* @author Jeffrey Walton (noloader .at. gmail.com)
36+
* @author Kevin Wall (kevin.w.wall .at. gmail.com)
37+
* @author Matt Seil (matt.seil .at. owasp.org)
38+
* @since July 31, 2022
39+
* @see org.owasp.esapi.Encoder
40+
*/
41+
public class JSONCodec extends AbstractIntegerCodec {
42+
43+
44+
/**
45+
* {@inheritDoc}
46+
*
47+
* Escape special characters in JSON strings.
48+
*
49+
* encodeCharacter will escape the characters Backspace (\b), Form Feed (\f),
50+
* Carriage Return (\r), Line Feed (\n), Tab (\t), Double Quote (") and Backslash (\).
51+
* If the character is a control character (U+0000 through U+001f), then it will be
52+
* Unicode encoded (\u0000 through \u001f). If the character is not special or in the
53+
* user supplied immune list, then the character is returned unescaped. If the
54+
* character is null then an empty string is returned.
55+
*
56+
* @param immune character array of whitelist characters which should not be encoded
57+
* @param c the character to encode if not in the immune list
58+
* @return encoded character if the character is special, and the character otherwise.
59+
*/
60+
public String encodeCharacter( char[] immune, Character c ) {
61+
if ( c == null ) {
62+
return "";
63+
}
64+
65+
return encodeCharacter(immune, charToCodepoint( c ));
66+
}
67+
68+
/**
69+
* {@inheritDoc}
70+
*
71+
* Escape special characters in JSON strings.
72+
*
73+
* encodeCharacter will escape the characters Backspace (\b), Form Feed (\f),
74+
* Carriage Return (\r), Line Feed (\n), Tab (\t), Double Quote (") and Backslash (\).
75+
* If the character is a control character (U+0000 through U+001f), then it will be
76+
* Unicode encoded (\u0000 through \u001f). If the character is not special or in the
77+
* user supplied immune list, then the character is returned unescaped. If the
78+
* character is null then an empty string is returned.
79+
*
80+
* @param immune character array of whitelist characters which should not be encoded
81+
* @param c the character to encode if not in the immune list
82+
* @return encoded character if the character is special, and the character otherwise.
83+
*/
84+
public String encodeCharacter( char[] immune, int codePoint )
85+
throws IllegalArgumentException {
86+
87+
if ( Character.isValidCodePoint(codePoint) == false ) {
88+
throw new IllegalArgumentException( "Invalid codepoint '" + codePoint + "'." );
89+
}
90+
91+
if ( immune != null ) {
92+
// More efficient than sort and binary search. If the immune array
93+
// was presorted, then this could be O(log n). But we can't add the
94+
// precondition now. It is too late in the game.
95+
for ( Character ch : immune ) {
96+
if ( charToCodepoint( ch ) == codePoint ) {
97+
return new String(Character.toChars(codePoint));
98+
}
99+
}
100+
}
101+
102+
// Per the RFC... Two-character sequence escape representations of some
103+
// popular characters
104+
switch ( codePoint ) {
105+
case '\b': return "\\b";
106+
case '\f': return "\\f";
107+
case '\r': return "\\r";
108+
case '\n': return "\\n";
109+
case '\t': return "\\t";
110+
case '"': return "\\\"";
111+
case '/': return "\\/";
112+
case '\\': return "\\\\";
113+
}
114+
115+
// Per the RFC... All Unicode characters may be placed within the
116+
// quotation marks, except for the characters that MUST be escaped:
117+
// quotation mark, reverse solidus, and the control characters
118+
// (U+0000 through U+001F).
119+
if ( codePoint <= 0x1f ) {
120+
121+
return String.format("\\u%04x", codePoint);
122+
}
123+
124+
return new String(Character.toChars(codePoint));
125+
}
126+
127+
128+
/**
129+
* {@inheritDoc}
130+
*
131+
* Decodes special characters in encoded JSON strings.
132+
*
133+
* decodeCharacter will decode the encoded character sequences for popular characters
134+
* Backspace (\b), Form Feed (\f), Carriage Return (\r), Line Feed (\n), Tab (\t),
135+
* Double Quote ("), Forward slash (/) and Backslash (\). The function will also decode
136+
* six-character sequences of \u0000 - \uffff. If the character is not encoded then a
137+
* null character is returned.
138+
*
139+
* decodeCharacter does not handle all Unicode codepoints properly. If a codepoint is
140+
* encountered with a surrogate pair, then null is returned. This will happen with
141+
* codepoints greater than 64k. In this case we need to return two characters, not one.
142+
*
143+
* @param input a character sequence to decode
144+
* @return the decoded version of the encoded character starting at index,
145+
* or null otherwise
146+
*
147+
* @throws IllegalArgumentException
148+
* if an invalid character sequence is encountered
149+
*/
150+
public Integer decodeCharacter( PushbackSequence<Integer> input )
151+
throws IllegalArgumentException {
152+
153+
input.mark();
154+
155+
Integer first = input.next(), second = null;
156+
if ( first == null || first.intValue() != '\\' ) {
157+
input.reset();
158+
return null;
159+
}
160+
161+
String errorMessage = null;
162+
163+
try
164+
{
165+
errorMessage = "Invalid JSON escape representation";
166+
167+
if ( (second = input.next()) == null ) {
168+
throw new IllegalArgumentException();
169+
}
170+
171+
// Per the RFC... Two-character sequence escape representations of some popular characters
172+
switch ( second.intValue() ) {
173+
case 'b': return (int)'\b';
174+
case 'f': return (int)'\f';
175+
case 'r': return (int)'\r';
176+
case 'n': return (int)'\n';
177+
case 't': return (int)'\t';
178+
case '"': return (int)'\"';
179+
case '/': return (int)'/';
180+
case '\\': return (int)'\\';
181+
}
182+
183+
errorMessage = "Invalid JSON two-character escape representation";
184+
185+
// Per the RFC... All characters may be escaped as a six-character sequence: a reverse solidus,
186+
// followed by the lowercase letter u, followed by four hexadecimal digits that encode the
187+
// character's code point. The hexadecimal letters A through F can be uppercase or lowercase.
188+
// So, for example, a string containing only a single reverse solidus character may be represented
189+
// as "\u005C".
190+
if ( second.intValue() == 'u' ) {
191+
192+
errorMessage = "Invalid JSON six-character escape representation";
193+
194+
return (convertToInt( input.next() ) << 12) +
195+
(convertToInt( input.next() ) << 8) +
196+
(convertToInt( input.next() ) << 4) +
197+
(convertToInt( input.next() ) << 0);
198+
}
199+
200+
// Do nothing. Fall into throw below.
201+
}
202+
catch (IllegalArgumentException e)
203+
{
204+
// Do nothing. Fall into throw below.
205+
}
206+
207+
// Catch all. The escaped character sequence was invalid.
208+
input.reset();
209+
throw new IllegalArgumentException( errorMessage );
210+
}
211+
212+
protected int charToCodepoint( Character ch ) {
213+
214+
final String s = Character.toString(ch);
215+
assert (s.length() == 1) : "Ooops";
216+
217+
return s.codePointAt(0);
218+
}
219+
220+
protected int convertToInt( Integer hexDigit ) {
221+
222+
if ( hexDigit == null ) {
223+
throw new IllegalArgumentException( "Cannot convert from '<null>' to int." );
224+
}
225+
226+
final int value = Character.digit( hexDigit.intValue(), 16 );
227+
228+
if ( value < 0 || value >= 16 ) {
229+
throw new IllegalArgumentException( "Cannot convert from hexadecimal '" + hexDigit.toString() + "' to int." );
230+
}
231+
232+
return value;
233+
}
234+
235+
}

src/main/java/org/owasp/esapi/reference/DefaultEncoder.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.owasp.esapi.codecs.PercentCodec;
4343
import org.owasp.esapi.codecs.VBScriptCodec;
4444
import org.owasp.esapi.codecs.XMLEntityCodec;
45+
import org.owasp.esapi.codecs.JSONCodec;
4546
import org.owasp.esapi.errors.EncodingException;
4647
import org.owasp.esapi.errors.IntrusionException;
4748

@@ -79,14 +80,15 @@ public static Encoder getInstance() {
7980
private JavaScriptCodec javaScriptCodec = new JavaScriptCodec();
8081
private VBScriptCodec vbScriptCodec = new VBScriptCodec();
8182
private CSSCodec cssCodec = new CSSCodec();
83+
private JSONCodec jsonCodec = new JSONCodec();
8284

8385
private final Logger logger = ESAPI.getLogger("Encoder");
8486

8587
/**
8688
* Character sets that define characters (in addition to alphanumerics) that are
8789
* immune from encoding in various formats
8890
*/
89-
private final static char[] IMMUNE_HTML = { ',', '.', '-', '_', ' ' };
91+
private final static char[] IMMUNE_HTML = { ',', '.', '-', '_', ' ' };
9092
private final static char[] IMMUNE_HTMLATTR = { ',', '.', '-', '_' };
9193
private final static char[] IMMUNE_CSS = { '#' };
9294
private final static char[] IMMUNE_JAVASCRIPT = { ',', '.', '_' };
@@ -96,6 +98,7 @@ public static Encoder getInstance() {
9698
private final static char[] IMMUNE_OS = { '-' };
9799
private final static char[] IMMUNE_XMLATTR = { ',', '.', '-', '_' };
98100
private final static char[] IMMUNE_XPATH = { ',', '.', '-', '_', ' ' };
101+
private final static char[] IMMUNE_JSON = { };
99102

100103

101104
/**
@@ -630,4 +633,25 @@ public Map<String, List<String>> splitQuery(URI uri) throws UnsupportedEncodingE
630633
}
631634
return query_pairs;
632635
}
636+
637+
/**
638+
* {@inheritDoc}
639+
*/
640+
public String encodeForJSON(String input) {
641+
if( input == null ) {
642+
return null;
643+
}
644+
return jsonCodec.encode(IMMUNE_JSON, input);
645+
}
646+
647+
/**
648+
* {@inheritDoc}
649+
*/
650+
public String decodeFromJSON(String input) {
651+
if( input == null ) {
652+
return null;
653+
}
654+
return jsonCodec.decode(input);
655+
}
656+
633657
}

0 commit comments

Comments
 (0)