|  | 
|  | 1 | +/** | 
|  | 2 | + * OWASP Enterprise Security API (ESAPI) | 
|  | 3 | + * | 
|  | 4 | + * This file is part of the Open Web Application Security Project (OWASP) | 
|  | 5 | + * Enterprise Security API (ESAPI) project. For details, please see | 
|  | 6 | + * <a href="http://www.owasp.org/index.php/ESAPI">http://www.owasp.org/index.php/ESAPI</a>. | 
|  | 7 | + * | 
|  | 8 | + * Copyright (c) 2022 - The OWASP Foundation | 
|  | 9 | + * | 
|  | 10 | + * The ESAPI is published by OWASP under the BSD license. You should read and accept the | 
|  | 11 | + * LICENSE before you use, modify, and/or redistribute this software. | 
|  | 12 | + * | 
|  | 13 | + * @author Jeffrey Walton (noloader .at. gmail.com) | 
|  | 14 | + * @author Kevin Wall (kevin.w.wall .at. gmail.com) | 
|  | 15 | + * @author Matt Seil (matt.seil .at. owasp.org) | 
|  | 16 | + * @created 2022 | 
|  | 17 | + */ | 
|  | 18 | +package org.owasp.esapi.codecs; | 
|  | 19 | + | 
|  | 20 | +/** | 
|  | 21 | + * Implementation of the Codec interface for JSON strings. | 
|  | 22 | + * This class performs <a | 
|  | 23 | + * href="https://datatracker.ietf.org/doc/html/rfc8259#section-7">String escaping</a> | 
|  | 24 | + * on the entire string according to RFC 8259, Section 7. | 
|  | 25 | + * | 
|  | 26 | + * RFC 8259 requires conforming implementations use UTF-8. However, the ESAPI interfaces | 
|  | 27 | + * utilize Java strings, which are UTF-16. This may cause problems during encoding and | 
|  | 28 | + * decoding operations. To avoid some of the problems, convert the string to UTF-8 before | 
|  | 29 | + * encoding and from UTF-8 after decoding. Ultimately the ESAPI encoder interfaces will | 
|  | 30 | + * need modification to provide byte array arguments and return values. | 
|  | 31 | + * | 
|  | 32 | + * @see <a href="https://datatracker.ietf.org/doc/html/rfc8259#section-7">RFC 8259, | 
|  | 33 | + * The JavaScript Object Notation (JSON) Data Interchange Format, Section 7</a> | 
|  | 34 | + * | 
|  | 35 | + * @author Jeffrey Walton (noloader .at. gmail.com) | 
|  | 36 | + * @author Kevin Wall (kevin.w.wall .at. gmail.com) | 
|  | 37 | + * @author Matt Seil (matt.seil .at. owasp.org) | 
|  | 38 | + * @since July 31, 2022 | 
|  | 39 | + * @see org.owasp.esapi.Encoder | 
|  | 40 | + */ | 
|  | 41 | +public class JSONCodec extends AbstractIntegerCodec { | 
|  | 42 | + | 
|  | 43 | + | 
|  | 44 | +    /** | 
|  | 45 | +     * {@inheritDoc} | 
|  | 46 | +     * | 
|  | 47 | +     * Escape special characters in JSON strings. | 
|  | 48 | +     * | 
|  | 49 | +     * encodeCharacter will escape the characters Backspace (\b), Form Feed (\f), | 
|  | 50 | +     * Carriage Return (\r), Line Feed (\n), Tab (\t), Double Quote (") and Backslash (\). | 
|  | 51 | +     * If the character is a control character (U+0000 through U+001f), then it will be | 
|  | 52 | +     * Unicode encoded (\u0000 through \u001f). If the character is not special or in the | 
|  | 53 | +     * user supplied immune list, then the character is returned unescaped. If the | 
|  | 54 | +     * character is null then an empty string is returned. | 
|  | 55 | +     * | 
|  | 56 | +     * @param immune character array of whitelist characters which should not be encoded | 
|  | 57 | +     * @param c the character to encode if not in the immune list | 
|  | 58 | +     * @return encoded character if the character is special, and the character otherwise. | 
|  | 59 | +     */ | 
|  | 60 | +    public String encodeCharacter( char[] immune, Character c ) { | 
|  | 61 | +        if ( c == null ) { | 
|  | 62 | +            return ""; | 
|  | 63 | +        } | 
|  | 64 | + | 
|  | 65 | +        return encodeCharacter(immune, charToCodepoint( c )); | 
|  | 66 | +    } | 
|  | 67 | + | 
|  | 68 | +    /** | 
|  | 69 | +     * {@inheritDoc} | 
|  | 70 | +     * | 
|  | 71 | +     * Escape special characters in JSON strings. | 
|  | 72 | +     * | 
|  | 73 | +     * encodeCharacter will escape the characters Backspace (\b), Form Feed (\f), | 
|  | 74 | +     * Carriage Return (\r), Line Feed (\n), Tab (\t), Double Quote (") and Backslash (\). | 
|  | 75 | +     * If the character is a control character (U+0000 through U+001f), then it will be | 
|  | 76 | +     * Unicode encoded (\u0000 through \u001f). If the character is not special or in the | 
|  | 77 | +     * user supplied immune list, then the character is returned unescaped. If the | 
|  | 78 | +     * character is null then an empty string is returned. | 
|  | 79 | +     * | 
|  | 80 | +     * @param immune character array of whitelist characters which should not be encoded | 
|  | 81 | +     * @param c the character to encode if not in the immune list | 
|  | 82 | +     * @return encoded character if the character is special, and the character otherwise. | 
|  | 83 | +     */ | 
|  | 84 | +    public String encodeCharacter( char[] immune, int codePoint ) | 
|  | 85 | +        throws IllegalArgumentException { | 
|  | 86 | + | 
|  | 87 | +        if ( Character.isValidCodePoint(codePoint) == false ) { | 
|  | 88 | +            throw new IllegalArgumentException( "Invalid codepoint '" + codePoint + "'." ); | 
|  | 89 | +        } | 
|  | 90 | + | 
|  | 91 | +        if ( immune != null ) { | 
|  | 92 | +            // More efficient than sort and binary search. If the immune array | 
|  | 93 | +            // was presorted, then this could be O(log n). But we can't add the | 
|  | 94 | +            // precondition now. It is too late in the game. | 
|  | 95 | +            for ( Character ch : immune ) { | 
|  | 96 | +                if ( charToCodepoint( ch ) == codePoint ) { | 
|  | 97 | +                    return new String(Character.toChars(codePoint)); | 
|  | 98 | +                } | 
|  | 99 | +            } | 
|  | 100 | +        } | 
|  | 101 | + | 
|  | 102 | +        // Per the RFC... Two-character sequence escape representations of some | 
|  | 103 | +        // popular characters | 
|  | 104 | +        switch ( codePoint ) { | 
|  | 105 | +            case '\b': return "\\b"; | 
|  | 106 | +            case '\f': return "\\f"; | 
|  | 107 | +            case '\r': return "\\r"; | 
|  | 108 | +            case '\n': return "\\n"; | 
|  | 109 | +            case '\t': return "\\t"; | 
|  | 110 | +            case '"':  return "\\\""; | 
|  | 111 | +            case '/':  return  "\\/"; | 
|  | 112 | +            case '\\': return "\\\\"; | 
|  | 113 | +        } | 
|  | 114 | + | 
|  | 115 | +        // Per the RFC... All Unicode characters may be placed within the | 
|  | 116 | +        // quotation marks, except for the characters that MUST be escaped: | 
|  | 117 | +        // quotation mark, reverse solidus, and the control characters | 
|  | 118 | +        // (U+0000 through U+001F). | 
|  | 119 | +        if ( codePoint <=  0x1f ) { | 
|  | 120 | + | 
|  | 121 | +            return String.format("\\u%04x", codePoint); | 
|  | 122 | +        } | 
|  | 123 | + | 
|  | 124 | +        return new String(Character.toChars(codePoint)); | 
|  | 125 | +    } | 
|  | 126 | + | 
|  | 127 | + | 
|  | 128 | +    /** | 
|  | 129 | +     * {@inheritDoc} | 
|  | 130 | +     * | 
|  | 131 | +     * Decodes special characters in encoded JSON strings. | 
|  | 132 | +     * | 
|  | 133 | +     * decodeCharacter will decode the encoded character sequences for popular characters | 
|  | 134 | +     * Backspace (\b), Form Feed (\f), Carriage Return (\r), Line Feed (\n), Tab (\t), | 
|  | 135 | +     * Double Quote ("), Forward slash (/) and Backslash (\). The function will also decode | 
|  | 136 | +     * six-character sequences of \u0000 - \uffff. If the character is not encoded then a | 
|  | 137 | +     * null character is returned. | 
|  | 138 | +     * | 
|  | 139 | +     * decodeCharacter does not handle all Unicode codepoints properly. If a codepoint is | 
|  | 140 | +     * encountered with a surrogate pair, then null is returned. This will happen with | 
|  | 141 | +     * codepoints greater than 64k. In this case we need to return two characters, not one. | 
|  | 142 | +     * | 
|  | 143 | +     * @param input a character sequence to decode | 
|  | 144 | +     * @return the decoded version of the encoded character starting at index, | 
|  | 145 | +     *     or null otherwise | 
|  | 146 | +     * | 
|  | 147 | +     * @throws IllegalArgumentException | 
|  | 148 | +     *     if an invalid character sequence is encountered | 
|  | 149 | +     */ | 
|  | 150 | +    public Integer decodeCharacter( PushbackSequence<Integer> input ) | 
|  | 151 | +        throws IllegalArgumentException { | 
|  | 152 | + | 
|  | 153 | +        input.mark(); | 
|  | 154 | + | 
|  | 155 | +        Integer first = input.next(), second = null; | 
|  | 156 | +        if ( first == null || first.intValue() != '\\' ) { | 
|  | 157 | +            input.reset(); | 
|  | 158 | +            return null; | 
|  | 159 | +        } | 
|  | 160 | + | 
|  | 161 | +        String errorMessage = null; | 
|  | 162 | + | 
|  | 163 | +        try | 
|  | 164 | +        { | 
|  | 165 | +            errorMessage = "Invalid JSON escape representation"; | 
|  | 166 | + | 
|  | 167 | +            if ( (second = input.next()) == null ) { | 
|  | 168 | +                throw new IllegalArgumentException(); | 
|  | 169 | +            } | 
|  | 170 | + | 
|  | 171 | +            // Per the RFC... Two-character sequence escape representations of some popular characters | 
|  | 172 | +            switch ( second.intValue() ) { | 
|  | 173 | +                case 'b': return (int)'\b'; | 
|  | 174 | +                case 'f': return (int)'\f'; | 
|  | 175 | +                case 'r': return (int)'\r'; | 
|  | 176 | +                case 'n': return (int)'\n'; | 
|  | 177 | +                case 't': return (int)'\t'; | 
|  | 178 | +                case '"': return (int)'\"'; | 
|  | 179 | +                case '/': return  (int)'/'; | 
|  | 180 | +                case '\\': return (int)'\\'; | 
|  | 181 | +            } | 
|  | 182 | + | 
|  | 183 | +            errorMessage = "Invalid JSON two-character escape representation"; | 
|  | 184 | + | 
|  | 185 | +            // Per the RFC... All characters may be escaped as a six-character sequence: a reverse solidus, | 
|  | 186 | +            // followed by the lowercase letter u, followed by four hexadecimal digits that encode the | 
|  | 187 | +            // character's code point. The hexadecimal letters A through F can be uppercase or lowercase. | 
|  | 188 | +            // So, for example, a string containing only a single reverse solidus character may be represented | 
|  | 189 | +            // as "\u005C". | 
|  | 190 | +            if ( second.intValue() == 'u' ) { | 
|  | 191 | + | 
|  | 192 | +                errorMessage = "Invalid JSON six-character escape representation"; | 
|  | 193 | + | 
|  | 194 | +                return (convertToInt( input.next() ) << 12) + | 
|  | 195 | +                       (convertToInt( input.next() ) <<  8) + | 
|  | 196 | +                       (convertToInt( input.next() ) <<  4) + | 
|  | 197 | +                       (convertToInt( input.next() ) <<  0); | 
|  | 198 | +            } | 
|  | 199 | + | 
|  | 200 | +            // Do nothing. Fall into throw below. | 
|  | 201 | +        } | 
|  | 202 | +        catch (IllegalArgumentException e) | 
|  | 203 | +        { | 
|  | 204 | +            // Do nothing. Fall into throw below. | 
|  | 205 | +        } | 
|  | 206 | + | 
|  | 207 | +        // Catch all. The escaped character sequence was invalid. | 
|  | 208 | +        input.reset(); | 
|  | 209 | +        throw new IllegalArgumentException( errorMessage ); | 
|  | 210 | +    } | 
|  | 211 | + | 
|  | 212 | +    protected int charToCodepoint( Character ch ) { | 
|  | 213 | + | 
|  | 214 | +        final String s = Character.toString(ch); | 
|  | 215 | +        assert (s.length() == 1) : "Ooops"; | 
|  | 216 | + | 
|  | 217 | +        return s.codePointAt(0); | 
|  | 218 | +    } | 
|  | 219 | + | 
|  | 220 | +    protected int convertToInt( Integer hexDigit ) { | 
|  | 221 | + | 
|  | 222 | +        if ( hexDigit == null ) { | 
|  | 223 | +            throw new IllegalArgumentException( "Cannot convert from '<null>' to int." ); | 
|  | 224 | +        } | 
|  | 225 | + | 
|  | 226 | +        final int value = Character.digit( hexDigit.intValue(), 16 ); | 
|  | 227 | + | 
|  | 228 | +        if ( value < 0 || value >= 16 ) { | 
|  | 229 | +            throw new IllegalArgumentException( "Cannot convert from hexadecimal '" + hexDigit.toString() + "' to int." ); | 
|  | 230 | +        } | 
|  | 231 | + | 
|  | 232 | +        return value; | 
|  | 233 | +    } | 
|  | 234 | + | 
|  | 235 | +} | 
0 commit comments