-
Notifications
You must be signed in to change notification settings - Fork 0
/
LyingTokenSource.java
161 lines (141 loc) · 5.77 KB
/
LyingTokenSource.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
package polyfauna.intellihask.parser;
import com.intellij.lang.PsiBuilder;
import polyfauna.intellihask.antlr_generated.HaskellLexer;
import polyfauna.intellihask.psi.Tokens;
import org.antlr.intellij.adaptor.lexer.PSITokenSource;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.Pair;
import java.util.ArrayDeque;
import java.util.Deque;
// inserts SEMI, VOCURLY, and VCCURLY tokens for parsing, to be removed later
public class LyingTokenSource extends PSITokenSource{
protected static class Block{
int indent;
int parens;
boolean isLetBlock;
public Block(int indent, boolean isLetBlock){
this.indent = indent;
this.isLetBlock = isLetBlock;
}
}
protected Deque<Token> preempted = new ArrayDeque<>();
protected Deque<Block> blocks = new ArrayDeque<>();
protected boolean wasBlockKw = false, wasLetKw = false;
// we don't *actually* have access to line numbers or indexes, but we see newlines, so like same difference
protected boolean seenNewline = false;
protected int lastLineOffset = 0;
public LyingTokenSource(PsiBuilder builder){
super(builder);
// we also don't *actually* see newlines; instead we reconstruct them from the skipping callback
builder.setWhitespaceSkippedCallback((type, start, end) -> {
if(type == Tokens.getFor(HaskellLexer.NEWLINE)){
seenNewline = true;
lastLineOffset = start;
}
});
}
// on getting a new token,
// if it's the first token of this line,
// if the previous token was a keyword, this indent defines the start of a block
// -> produce a VOCURLY, push a new indent to the stack, and schedule *this* token next
// -> TODO: if the indentation is smaller than the previous block indent, error; could just not create VOCURLY, or handle later?
// if this has the same or greater indent than the current block, then this is a statement
// -> produce a SEMI, schedule this token next
// if this has less indent than the current block, this ends the block
// -> produce a VCCURLY, pop the indent from the stack, and schedule this token next
// -> apply this repeatedly for each closed block
// if it's a newline, consider its indent to be 0 always
// if there's no current block, consider the current block indent to be 0
public Token nextToken(){
if(!preempted.isEmpty())
return preempted.pop();
Token next = super.nextToken(); // possibly triggers the whitespace callback
preempted.push(next);
int indent = next.getStartIndex() - lastLineOffset - 1;
// start a new block after any block kw not followed by {, even if there's no newline
boolean noExtraSemi = false;
if(wasBlockKw && next.getType() != HaskellLexer.OCURLY){
pushBlock(indent);
preempted.push(createTok(HaskellLexer.VOCURLY, "VOCURLY", next.getStopIndex()));
// don't start every block with an unnecessary semi
noExtraSemi = true;
}
// adjust parens for this block
if(next.getType() == HaskellLexer.OpenRoundBracket)
afterParen(1);
if(next.getType() == HaskellLexer.CloseRoundBracket)
afterParen(-1);
// if the current block has dangling parenthesis, we know we should finish early
// e.g. `(case h of \{ [] -> [] \; \})`
if(isDanglingParen()){
popBlock();
preempted.push(createTok(HaskellLexer.VCCURLY, "VCCURLY-P", next.getStopIndex()));
preempted.push(createTok(HaskellLexer.SEMI, "SEMI-P", next.getStopIndex()));
// if the current block was implicitly triggered by a `let` keyword, the `in` keyword can also end it early
// e.g. `let \{ p = 1 \; \} in p`
// but we don't want normal semis to be inserted in the multiline case,
// i.e. `let \n \{ p = 1 \n \; \} \; in p` would be unwanted
}else if(isLetBlock() && next.getType() == HaskellLexer.IN){
popBlock();
preempted.push(createTok(HaskellLexer.VCCURLY, "VCCURLY-L", next.getStopIndex()));
preempted.push(createTok(HaskellLexer.SEMI, "SEMI-L", next.getStopIndex()));
noExtraSemi = true;
}
if(next.getType() == HaskellLexer.EOF){
// all remaining VCCURLYs, plus two SEMIs for the road
preempted.push(createTok(HaskellLexer.SEMI, "SEMI-EOF", next.getStopIndex()));
while(!blocks.isEmpty()){
popBlock();
preempted.push(createTok(HaskellLexer.VCCURLY, "VCCURLY-EOF", next.getStopIndex()));
}
preempted.push(createTok(HaskellLexer.SEMI, "SEMI-EOF", next.getStopIndex()));
}else if(seenNewline){
seenNewline = false;
if(indent == curBlockIndent() && !noExtraSemi)
preempted.push(createTok(HaskellLexer.SEMI, "SEMI", next.getStopIndex()));
while(indent < curBlockIndent()){
popBlock();
preempted.push(createTok(HaskellLexer.SEMI, "SEMI", next.getStopIndex()));
preempted.push(createTok(HaskellLexer.VCCURLY, "VCCURLY", next.getStopIndex()));
preempted.push(createTok(HaskellLexer.SEMI, "SEMI", next.getStopIndex()));
}
}
wasBlockKw = switch(next.getType()){
case HaskellLexer.LET, HaskellLexer.WHERE, HaskellLexer.DO, HaskellLexer.OF -> true;
default -> false;
};
wasLetKw = next.getType() == HaskellLexer.LET;
return preempted.pop();
}
protected int curBlockIndent(){
if(!blocks.isEmpty())
return blocks.peek().indent;
return 0;
}
protected void pushBlock(int indent){
blocks.push(new Block(indent, wasLetKw));
}
protected void popBlock(){
blocks.pop();
}
protected void afterParen(int adj){
if(!blocks.isEmpty())
blocks.peek().parens += adj;
}
protected boolean isDanglingParen(){
return !blocks.isEmpty() && blocks.peek().parens < 0;
}
protected boolean isLetBlock(){
return !blocks.isEmpty() && blocks.peek().isLetBlock;
}
@SuppressWarnings("unchecked")
protected Token createTok(int type, String name, int nstop){
return /* VCCURLY */ tokenFactory.create(
new Pair<>(this, ""),
type,
"<" + name + ">",
HaskellLexer.DEFAULT_TOKEN_CHANNEL,
nstop, nstop,
0, 0);
}
}