Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New \p{Letter} Unicode property escape #1688

Merged
merged 1 commit into from
Mar 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions tool-testsuite/test/org/antlr/v4/test/tool/TestATNConstruction.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,129 @@ public void testA() throws Exception {
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSet() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [abc] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{97..99}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetRange() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [a-c] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{97..99}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodeBMPEscape() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\uABCD] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-43981->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodeBMPEscapeRange() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [a-c\\uABCD-\\uABFF] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{97..99, 43981..44031}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodeSMPEscape() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\u{10ABCD}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-1092557->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodeSMPEscapeRange() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [a-c\\u{10ABCD}-\\u{10ABFF}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{97..99, 1092557..1092607}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodePropertyEscape() throws Exception {
// The Gothic script is long dead and unlikely to change (which would
// cause this test to fail)
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\p{Gothic}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{66352..66378}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodePropertyInvertEscape() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\P{Gothic}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{0..66351, 66379..1114111}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodeMultiplePropertyEscape() throws Exception {
// Ditto the Mahajani script. Not going to change soon. I hope.
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\p{Gothic}\\p{Mahajani}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{66352..66378, 69968..70006}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodePropertyOverlap() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\p{ASCII_Hex_Digit}\\p{Hex_Digit}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{48..57, 65..70, 97..102, 65296..65305, 65313..65318, 65345..65350}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testRangeOrRange() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,44 @@ public void testSetUp() throws Exception {
super.testErrors(pair, true);
}

@Test public void testInvalidUnicodeEscapesInCharSet() {
String grammar =
"lexer grammar Test;\n" +
"INVALID_EXTENDED_UNICODE_EMPTY: [\\u{}];\n" +
"INVALID_EXTENDED_UNICODE_NOT_TERMINATED: [\\u{];\n" +
"INVALID_EXTENDED_UNICODE_TOO_LONG: [\\u{110000}];\n" +
"INVALID_UNICODE_PROPERTY_EMPTY: [\\p{}];\n" +
"INVALID_UNICODE_PROPERTY_NOT_TERMINATED: [\\p{];\n" +
"INVALID_INVERTED_UNICODE_PROPERTY_EMPTY: [\\P{}];\n" +
"INVALID_UNICODE_PROPERTY_UNKNOWN: [\\p{NotAProperty}];\n" +
"INVALID_INVERTED_UNICODE_PROPERTY_UNKNOWN: [\\P{NotAProperty}];\n" +
"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}];\n" +
"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_2: [\\p{Letter}-Z];\n" +
"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_3: [A-\\p{Number}];\n" +
"INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n";

String expected =
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:10:39: unicode property escapes not allowed in lexer charset range: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}]\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:11:41: unicode property escapes not allowed in lexer charset range: [\\p{Letter}-Z]\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:12:41: unicode property escapes not allowed in lexer charset range: [A-\\p{Number}]\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:13:48: unicode property escapes not allowed in lexer charset range: [\\P{Uppercase_Letter}-\\P{Number}]\n";

String[] pair = new String[] {
grammar,
expected
};

super.testErrors(pair, true);
}

/**
* This test ensures the {@link ErrorType#UNRECOGNIZED_ASSOC_OPTION} warning
* is produced as described in the documentation.
Expand Down
Loading