-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLexer.ts
79 lines (58 loc) · 1.81 KB
/
Lexer.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import { CompiledTokenGrammar } from "./compileTokenGrammar";
import Token from "./Token";
import TokenStream from "./TokenStream";
function maximumMatch(matches: Record<string, RegExpMatchArray | null>) {
let t: { matchToken: string; matchLength: number } | undefined;
for (let matchToken in matches) {
const match = matches[matchToken];
if (!match) {
continue;
}
const matchLength = match[0].length;
if ((t && matchLength > t.matchLength) || (!t && matchLength > 0)) {
t = { matchToken, matchLength };
}
}
return t;
}
const defaultTokenizeOpts = { trim: true };
class Lexer {
public constructor(private tokenGrammar: CompiledTokenGrammar) {}
public tokenize(
program: string,
opts: { trim?: boolean } = defaultTokenizeOpts
): TokenStream {
const tokenStream = new TokenStream();
let programBuffer = program;
let currentOffset = 0;
if (opts.trim) {
programBuffer = programBuffer.replace("\n", "").trim();
}
while (programBuffer) {
const matches: Record<string, RegExpMatchArray | null> = {};
for (let [tokenName, matcher] of this.tokenGrammar) {
matches[tokenName] = programBuffer.match(matcher);
}
const match = maximumMatch(matches);
if (!match) {
throw new Error(
`unexpected token ${programBuffer[0]} at offset ${currentOffset}`
);
}
tokenStream.pushToken(
new Token(
match.matchToken,
programBuffer.substring(0, match.matchLength),
currentOffset
)
);
programBuffer = programBuffer.substring(match.matchLength);
if (opts.trim) {
programBuffer = programBuffer.replace("\n", "").trim();
}
currentOffset += match.matchLength;
}
return tokenStream;
}
}
export default Lexer;