-
Notifications
You must be signed in to change notification settings - Fork 3.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
219b88e
commit 54220ed
Showing
3 changed files
with
285 additions
and
1 deletion.
There are no files selected for viewing
131 changes: 131 additions & 0 deletions
131
tool-testsuite/test/org/antlr/v4/test/tool/TestEscapeSequenceParsing.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
/* | ||
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. | ||
* Use of this file is governed by the BSD 3-clause license that | ||
* can be found in the LICENSE.txt file in the project root. | ||
*/ | ||
|
||
package org.antlr.v4.test.tool; | ||
|
||
import org.antlr.v4.misc.EscapeSequenceParsing; | ||
import org.antlr.v4.runtime.misc.IntervalSet; | ||
|
||
import org.junit.Test; | ||
|
||
import static org.antlr.v4.misc.EscapeSequenceParsing.Result; | ||
import static org.junit.Assert.assertEquals; | ||
|
||
public class TestEscapeSequenceParsing { | ||
@Test | ||
public void testParseEmpty() { | ||
assertEquals( | ||
EscapeSequenceParsing.Result.INVALID, | ||
EscapeSequenceParsing.parseEscape("", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseJustBackslash() { | ||
assertEquals( | ||
EscapeSequenceParsing.Result.INVALID, | ||
EscapeSequenceParsing.parseEscape("\\", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseInvalidEscape() { | ||
assertEquals( | ||
EscapeSequenceParsing.Result.INVALID, | ||
EscapeSequenceParsing.parseEscape("\\z", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseNewline() { | ||
assertEquals( | ||
new Result(Result.Type.INTERVAL_SET, IntervalSet.of('\n'), 2), | ||
EscapeSequenceParsing.parseEscape("\\n", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodeTooShort() { | ||
assertEquals( | ||
EscapeSequenceParsing.Result.INVALID, | ||
EscapeSequenceParsing.parseEscape("\\uABC", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodeBMP() { | ||
assertEquals( | ||
new Result(Result.Type.INTERVAL_SET, IntervalSet.of(0xABCD), 6), | ||
EscapeSequenceParsing.parseEscape("\\uABCD", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodeSMPTooShort() { | ||
assertEquals( | ||
EscapeSequenceParsing.Result.INVALID, | ||
EscapeSequenceParsing.parseEscape("\\u{}", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodeSMPMissingCloseBrace() { | ||
assertEquals( | ||
EscapeSequenceParsing.Result.INVALID, | ||
EscapeSequenceParsing.parseEscape("\\u{12345", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodeTooBig() { | ||
assertEquals( | ||
EscapeSequenceParsing.Result.INVALID, | ||
EscapeSequenceParsing.parseEscape("\\u{110000}", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodeSMP() { | ||
assertEquals( | ||
new Result(Result.Type.INTERVAL_SET, IntervalSet.of(0x10ABCD), 10), | ||
EscapeSequenceParsing.parseEscape("\\u{10ABCD}", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodePropertyTooShort() { | ||
assertEquals( | ||
EscapeSequenceParsing.Result.INVALID, | ||
EscapeSequenceParsing.parseEscape("\\p{}", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodePropertyMissingCloseBrace() { | ||
assertEquals( | ||
EscapeSequenceParsing.Result.INVALID, | ||
EscapeSequenceParsing.parseEscape("\\p{1234", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodeProperty() { | ||
assertEquals( | ||
new Result(Result.Type.INTERVAL_SET, IntervalSet.of(66560, 66639), 11), | ||
EscapeSequenceParsing.parseEscape("\\p{Deseret}", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodePropertyInvertedTooShort() { | ||
assertEquals( | ||
EscapeSequenceParsing.Result.INVALID, | ||
EscapeSequenceParsing.parseEscape("\\P{}", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodePropertyInvertedMissingCloseBrace() { | ||
assertEquals( | ||
EscapeSequenceParsing.Result.INVALID, | ||
EscapeSequenceParsing.parseEscape("\\P{Deseret", 0)); | ||
} | ||
|
||
@Test | ||
public void testParseUnicodePropertyInverted() { | ||
IntervalSet expected = IntervalSet.of(0, 66559); | ||
expected.add(66640, Character.MAX_CODE_POINT); | ||
assertEquals( | ||
new Result(Result.Type.INTERVAL_SET, expected, 11), | ||
EscapeSequenceParsing.parseEscape("\\P{Deseret}", 0)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
/* | ||
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. | ||
* Use of this file is governed by the BSD 3-clause license that | ||
* can be found in the LICENSE.txt file in the project root. | ||
*/ | ||
|
||
package org.antlr.v4.misc; | ||
|
||
import java.util.Objects; | ||
|
||
import org.antlr.v4.runtime.Token; | ||
import org.antlr.v4.runtime.misc.IntervalSet; | ||
import org.antlr.v4.unicode.UnicodeData; | ||
|
||
/** | ||
* Utility class to parse escapes like: | ||
* \\n | ||
* \\uABCD | ||
* \\u{10ABCD} | ||
* \\p{Foo} | ||
* \\P{Bar} | ||
*/ | ||
public abstract class EscapeSequenceParsing { | ||
public static class Result { | ||
public enum Type { | ||
INVALID, | ||
INTERVAL_SET | ||
}; | ||
|
||
public static Result INVALID = new Result(Type.INVALID, IntervalSet.EMPTY_SET, -1); | ||
|
||
public final Type type; | ||
public final IntervalSet intervalSet; | ||
public final int parseLength; | ||
|
||
public Result(Type type, IntervalSet intervalSet, int parseLength) { | ||
this.type = type; | ||
this.intervalSet = intervalSet; | ||
this.parseLength = parseLength; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return String.format( | ||
"%s type=%s intervalSet=%s parseLength=%d", | ||
super.toString(), | ||
type, | ||
intervalSet, | ||
parseLength); | ||
} | ||
|
||
@Override | ||
public boolean equals(Object other) { | ||
if (!(other instanceof Result)) { | ||
return false; | ||
} | ||
Result that = (Result) other; | ||
if (this == that) { | ||
return true; | ||
} | ||
return Objects.equals(this.type, that.type) && | ||
Objects.equals(this.intervalSet, that.intervalSet) && | ||
Objects.equals(this.parseLength, that.parseLength); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(type, intervalSet, parseLength); | ||
} | ||
} | ||
|
||
/** | ||
* Parses a single escape sequence starting at {@code startOff}. | ||
* | ||
* Returns {@link Result.INVALID} if no valid escape sequence was found, a Result otherwise. | ||
*/ | ||
public static Result parseEscape(String s, int startOff) { | ||
int offset = startOff; | ||
if (offset + 2 > s.length() || s.codePointAt(offset) != '\\') { | ||
return Result.INVALID; | ||
} | ||
// Move past backslash | ||
offset++; | ||
int escaped = s.codePointAt(offset); | ||
// Move past escaped code point | ||
offset += Character.charCount(escaped); | ||
if (escaped == 'u') { | ||
// \\u{1} is the shortest we support | ||
if (offset + 3 > s.length()) { | ||
return Result.INVALID; | ||
} | ||
int hexStartOffset; | ||
int hexEndOffset; | ||
if (s.codePointAt(offset) == '{') { | ||
hexStartOffset = offset + 1; | ||
hexEndOffset = s.indexOf('}', hexStartOffset); | ||
if (hexEndOffset == -1) { | ||
return Result.INVALID; | ||
} | ||
offset = hexEndOffset + 1; | ||
} else { | ||
if (offset + 4 > s.length()) { | ||
return Result.INVALID; | ||
} | ||
hexStartOffset = offset; | ||
hexEndOffset = offset + 4; | ||
offset = hexEndOffset; | ||
} | ||
int codePointValue = CharSupport.parseHexValue(s, hexStartOffset, hexEndOffset); | ||
if (codePointValue == -1 || codePointValue > Character.MAX_CODE_POINT) { | ||
return Result.INVALID; | ||
} | ||
return new Result( | ||
Result.Type.INTERVAL_SET, | ||
IntervalSet.of(codePointValue), | ||
offset - startOff); | ||
} else if (escaped == 'p' || escaped == 'P') { | ||
// \p{L} is the shortest we support | ||
if (offset + 3 > s.length() || s.codePointAt(offset) != '{') { | ||
return Result.INVALID; | ||
} | ||
int openBraceOffset = offset; | ||
int closeBraceOffset = s.indexOf('}', openBraceOffset); | ||
if (closeBraceOffset == -1) { | ||
return Result.INVALID; | ||
} | ||
String propertyName = s.substring(openBraceOffset + 1, closeBraceOffset); | ||
IntervalSet propertyIntervalSet = UnicodeData.getPropertyCodePoints(propertyName); | ||
if (propertyIntervalSet == null) { | ||
return Result.INVALID; | ||
} | ||
offset = closeBraceOffset + 1; | ||
if (escaped == 'P') { | ||
propertyIntervalSet = propertyIntervalSet.complement(IntervalSet.COMPLETE_CHAR_SET); | ||
} | ||
return new Result( | ||
Result.Type.INTERVAL_SET, | ||
propertyIntervalSet, | ||
offset - startOff); | ||
} else if (escaped < CharSupport.ANTLRLiteralEscapedCharValue.length) { | ||
int codePoint = CharSupport.ANTLRLiteralEscapedCharValue[escaped]; | ||
if (codePoint == 0) { | ||
return Result.INVALID; | ||
} | ||
return new Result( | ||
Result.Type.INTERVAL_SET, | ||
IntervalSet.of(codePoint), | ||
offset - startOff); | ||
} else { | ||
return Result.INVALID; | ||
} | ||
} | ||
} |