Skip to content

Commit

Permalink
Merge pull request #1672 from bhamiltoncx/code-point-transitions
Browse files Browse the repository at this point in the history
New class CodePointTransitions to create SetTransitions for Unicode code points > U+FFFF
  • Loading branch information
parrt authored Feb 20, 2017
2 parents a6117de + 291638c commit 91df265
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/

package org.antlr.v4.runtime.atn;

import org.antlr.v4.runtime.misc.IntervalSet;

/**
* Utility class to create {@link AtomTransition}, {@link RangeTransition},
* and {@link SetTransition} appropriately based on the range of the input.
*
* To keep the serialized ATN size small, we only inline atom and
* range transitions for Unicode code points <= U+FFFF.
*
* Whenever we encounter a Unicode code point > U+FFFF, we represent that
* as a set transition (even if it is logically an atom or a range).
*/
public abstract class CodePointTransitions {
/**
* If {@code codePoint} is <= U+FFFF, returns a new {@link AtomTransition}.
* Otherwise, returns a new {@link SetTransition}.
*/
public static Transition createWithCodePoint(ATNState target, int codePoint) {
if (Character.isSupplementaryCodePoint(codePoint)) {
return new SetTransition(target, IntervalSet.of(codePoint));
} else {
return new AtomTransition(target, codePoint);
}
}

/**
* If {@code codePointFrom} and {@code codePointTo} are both
* <= U+FFFF, returns a new {@link RangeTransition}.
* Otherwise, returns a new {@link SetTransition}.
*/
public static Transition createWithCodePointRange(
ATNState target,
int codePointFrom,
int codePointTo) {
if (Character.isSupplementaryCodePoint(codePointFrom) ||
Character.isSupplementaryCodePoint(codePointTo)) {
return new SetTransition(target, IntervalSet.of(codePointFrom, codePointTo));
} else {
return new RangeTransition(target, codePointFrom, codePointTo);
}
}
}
5 changes: 3 additions & 2 deletions tool/src/org/antlr/v4/automata/ATNOptimizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.AtomTransition;
import org.antlr.v4.runtime.atn.BlockEndState;
import org.antlr.v4.runtime.atn.CodePointTransitions;
import org.antlr.v4.runtime.atn.DecisionState;
import org.antlr.v4.runtime.atn.EpsilonTransition;
import org.antlr.v4.runtime.atn.NotSetTransition;
Expand Down Expand Up @@ -116,11 +117,11 @@ private static void optimizeSets(Grammar g, ATN atn) {
Transition newTransition;
if (matchSet.getIntervals().size() == 1) {
if (matchSet.size() == 1) {
newTransition = new AtomTransition(blockEndState, matchSet.getMinElement());
newTransition = CodePointTransitions.createWithCodePoint(blockEndState, matchSet.getMinElement());
}
else {
Interval matchInterval = matchSet.getIntervals().get(0);
newTransition = new RangeTransition(blockEndState, matchInterval.a, matchInterval.b);
newTransition = CodePointTransitions.createWithCodePointRange(blockEndState, matchInterval.a, matchInterval.b);
}
}
else {
Expand Down
7 changes: 4 additions & 3 deletions tool/src/org/antlr/v4/automata/LexerATNFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.ActionTransition;
import org.antlr.v4.runtime.atn.AtomTransition;
import org.antlr.v4.runtime.atn.CodePointTransitions;
import org.antlr.v4.runtime.atn.LexerAction;
import org.antlr.v4.runtime.atn.LexerChannelAction;
import org.antlr.v4.runtime.atn.LexerCustomAction;
Expand Down Expand Up @@ -255,7 +256,7 @@ public Handle range(GrammarAST a, GrammarAST b) {
int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
checkRange(a, b, t1, t2);
left.addTransition(new RangeTransition(right, t1, t2));
left.addTransition(CodePointTransitions.createWithCodePointRange(right, t1, t2));
a.atnState = left;
b.atnState = left;
return new Handle(left, right);
Expand Down Expand Up @@ -301,7 +302,7 @@ else if ( t.getType()==ANTLRParser.TOKEN_REF ) {
Transition transition;
if (set.getIntervals().size() == 1) {
Interval interval = set.getIntervals().get(0);
transition = new RangeTransition(right, interval.a, interval.b);
transition = CodePointTransitions.createWithCodePointRange(right, interval.a, interval.b);
} else {
transition = new SetTransition(right, set);
}
Expand Down Expand Up @@ -356,7 +357,7 @@ public Handle stringLiteral(TerminalAST stringLiteralAST) {
for (int i = 0; i < n; ) {
right = newState(stringLiteralAST);
int codePoint = chars.codePointAt(i);
prev.addTransition(new AtomTransition(right, codePoint));
prev.addTransition(CodePointTransitions.createWithCodePoint(right, codePoint));
prev = right;
i += Character.charCount(codePoint);
}
Expand Down

0 comments on commit 91df265

Please sign in to comment.