Skip to content

Commit 20de961

Browse files
committed
Auto merge of #41824 - Mark-Simulacrum:undo-yacc-removal, r=nagisa
Readd LALR grammar Reverts a portion of #41705. Please let me know if I missed anything. r? @nagisa
2 parents 198917b + 6ebbe0e commit 20de961

File tree

7 files changed

+2725
-0
lines changed

7 files changed

+2725
-0
lines changed

src/grammar/.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*.class
2+
*.java
3+
*.tokens

src/grammar/lexer.l

+343
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
%{
2+
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
3+
// file at the top-level directory of this distribution and at
4+
// http://rust-lang.org/COPYRIGHT.
5+
//
6+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9+
// option. This file may not be copied, modified, or distributed
10+
// except according to those terms.
11+
12+
#include <stdio.h>
13+
#include <ctype.h>
14+
15+
static int num_hashes;
16+
static int end_hashes;
17+
static int saw_non_hash;
18+
19+
%}
20+
21+
%option stack
22+
%option yylineno
23+
24+
%x str
25+
%x rawstr
26+
%x rawstr_esc_begin
27+
%x rawstr_esc_body
28+
%x rawstr_esc_end
29+
%x byte
30+
%x bytestr
31+
%x rawbytestr
32+
%x rawbytestr_nohash
33+
%x pound
34+
%x shebang_or_attr
35+
%x ltorchar
36+
%x linecomment
37+
%x doc_line
38+
%x blockcomment
39+
%x doc_block
40+
%x suffix
41+
42+
ident [a-zA-Z\x80-\xff_][a-zA-Z0-9\x80-\xff_]*
43+
44+
%%
45+
46+
<suffix>{ident} { BEGIN(INITIAL); }
47+
<suffix>(.|\n) { yyless(0); BEGIN(INITIAL); }
48+
49+
[ \n\t\r] { }
50+
51+
\xef\xbb\xbf {
52+
// UTF-8 byte order mark (BOM), ignore if in line 1, error otherwise
53+
if (yyget_lineno() != 1) {
54+
return -1;
55+
}
56+
}
57+
58+
\/\/(\/|\!) { BEGIN(doc_line); yymore(); }
59+
<doc_line>\n { BEGIN(INITIAL);
60+
yyleng--;
61+
yytext[yyleng] = 0;
62+
return ((yytext[2] == '!') ? INNER_DOC_COMMENT : OUTER_DOC_COMMENT);
63+
}
64+
<doc_line>[^\n]* { yymore(); }
65+
66+
\/\/|\/\/\/\/ { BEGIN(linecomment); }
67+
<linecomment>\n { BEGIN(INITIAL); }
68+
<linecomment>[^\n]* { }
69+
70+
\/\*(\*|\!)[^*] { yy_push_state(INITIAL); yy_push_state(doc_block); yymore(); }
71+
<doc_block>\/\* { yy_push_state(doc_block); yymore(); }
72+
<doc_block>\*\/ {
73+
yy_pop_state();
74+
if (yy_top_state() == doc_block) {
75+
yymore();
76+
} else {
77+
return ((yytext[2] == '!') ? INNER_DOC_COMMENT : OUTER_DOC_COMMENT);
78+
}
79+
}
80+
<doc_block>(.|\n) { yymore(); }
81+
82+
\/\* { yy_push_state(blockcomment); }
83+
<blockcomment>\/\* { yy_push_state(blockcomment); }
84+
<blockcomment>\*\/ { yy_pop_state(); }
85+
<blockcomment>(.|\n) { }
86+
87+
_ { return UNDERSCORE; }
88+
as { return AS; }
89+
box { return BOX; }
90+
break { return BREAK; }
91+
const { return CONST; }
92+
continue { return CONTINUE; }
93+
crate { return CRATE; }
94+
else { return ELSE; }
95+
enum { return ENUM; }
96+
extern { return EXTERN; }
97+
false { return FALSE; }
98+
fn { return FN; }
99+
for { return FOR; }
100+
if { return IF; }
101+
impl { return IMPL; }
102+
in { return IN; }
103+
let { return LET; }
104+
loop { return LOOP; }
105+
match { return MATCH; }
106+
mod { return MOD; }
107+
move { return MOVE; }
108+
mut { return MUT; }
109+
priv { return PRIV; }
110+
proc { return PROC; }
111+
pub { return PUB; }
112+
ref { return REF; }
113+
return { return RETURN; }
114+
self { return SELF; }
115+
static { return STATIC; }
116+
struct { return STRUCT; }
117+
trait { return TRAIT; }
118+
true { return TRUE; }
119+
type { return TYPE; }
120+
typeof { return TYPEOF; }
121+
unsafe { return UNSAFE; }
122+
use { return USE; }
123+
where { return WHERE; }
124+
while { return WHILE; }
125+
126+
{ident} { return IDENT; }
127+
128+
0x[0-9a-fA-F_]+ { BEGIN(suffix); return LIT_INTEGER; }
129+
0o[0-8_]+ { BEGIN(suffix); return LIT_INTEGER; }
130+
0b[01_]+ { BEGIN(suffix); return LIT_INTEGER; }
131+
[0-9][0-9_]* { BEGIN(suffix); return LIT_INTEGER; }
132+
[0-9][0-9_]*\.(\.|[a-zA-Z]) { yyless(yyleng - 2); BEGIN(suffix); return LIT_INTEGER; }
133+
134+
[0-9][0-9_]*\.[0-9_]*([eE][-\+]?[0-9_]+)? { BEGIN(suffix); return LIT_FLOAT; }
135+
[0-9][0-9_]*(\.[0-9_]*)?[eE][-\+]?[0-9_]+ { BEGIN(suffix); return LIT_FLOAT; }
136+
137+
; { return ';'; }
138+
, { return ','; }
139+
\.\.\. { return DOTDOTDOT; }
140+
\.\. { return DOTDOT; }
141+
\. { return '.'; }
142+
\( { return '('; }
143+
\) { return ')'; }
144+
\{ { return '{'; }
145+
\} { return '}'; }
146+
\[ { return '['; }
147+
\] { return ']'; }
148+
@ { return '@'; }
149+
# { BEGIN(pound); yymore(); }
150+
<pound>\! { BEGIN(shebang_or_attr); yymore(); }
151+
<shebang_or_attr>\[ {
152+
BEGIN(INITIAL);
153+
yyless(2);
154+
return SHEBANG;
155+
}
156+
<shebang_or_attr>[^\[\n]*\n {
157+
// Since the \n was eaten as part of the token, yylineno will have
158+
// been incremented to the value 2 if the shebang was on the first
159+
// line. This yyless undoes that, setting yylineno back to 1.
160+
yyless(yyleng - 1);
161+
if (yyget_lineno() == 1) {
162+
BEGIN(INITIAL);
163+
return SHEBANG_LINE;
164+
} else {
165+
BEGIN(INITIAL);
166+
yyless(2);
167+
return SHEBANG;
168+
}
169+
}
170+
<pound>. { BEGIN(INITIAL); yyless(1); return '#'; }
171+
172+
\~ { return '~'; }
173+
:: { return MOD_SEP; }
174+
: { return ':'; }
175+
\$ { return '$'; }
176+
\? { return '?'; }
177+
178+
== { return EQEQ; }
179+
=> { return FAT_ARROW; }
180+
= { return '='; }
181+
\!= { return NE; }
182+
\! { return '!'; }
183+
\<= { return LE; }
184+
\<\< { return SHL; }
185+
\<\<= { return SHLEQ; }
186+
\< { return '<'; }
187+
\>= { return GE; }
188+
\>\> { return SHR; }
189+
\>\>= { return SHREQ; }
190+
\> { return '>'; }
191+
192+
\x27 { BEGIN(ltorchar); yymore(); }
193+
<ltorchar>static { BEGIN(INITIAL); return STATIC_LIFETIME; }
194+
<ltorchar>{ident} { BEGIN(INITIAL); return LIFETIME; }
195+
<ltorchar>\\[nrt\\\x27\x220]\x27 { BEGIN(suffix); return LIT_CHAR; }
196+
<ltorchar>\\x[0-9a-fA-F]{2}\x27 { BEGIN(suffix); return LIT_CHAR; }
197+
<ltorchar>\\u\{[0-9a-fA-F]?{6}\}\x27 { BEGIN(suffix); return LIT_CHAR; }
198+
<ltorchar>.\x27 { BEGIN(suffix); return LIT_CHAR; }
199+
<ltorchar>[\x80-\xff]{2,4}\x27 { BEGIN(suffix); return LIT_CHAR; }
200+
<ltorchar><<EOF>> { BEGIN(INITIAL); return -1; }
201+
202+
b\x22 { BEGIN(bytestr); yymore(); }
203+
<bytestr>\x22 { BEGIN(suffix); return LIT_BYTE_STR; }
204+
205+
<bytestr><<EOF>> { return -1; }
206+
<bytestr>\\[n\nrt\\\x27\x220] { yymore(); }
207+
<bytestr>\\x[0-9a-fA-F]{2} { yymore(); }
208+
<bytestr>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
209+
<bytestr>\\[^n\nrt\\\x27\x220] { return -1; }
210+
<bytestr>(.|\n) { yymore(); }
211+
212+
br\x22 { BEGIN(rawbytestr_nohash); yymore(); }
213+
<rawbytestr_nohash>\x22 { BEGIN(suffix); return LIT_BYTE_STR_RAW; }
214+
<rawbytestr_nohash>(.|\n) { yymore(); }
215+
<rawbytestr_nohash><<EOF>> { return -1; }
216+
217+
br/# {
218+
BEGIN(rawbytestr);
219+
yymore();
220+
num_hashes = 0;
221+
saw_non_hash = 0;
222+
end_hashes = 0;
223+
}
224+
<rawbytestr># {
225+
if (!saw_non_hash) {
226+
num_hashes++;
227+
} else if (end_hashes != 0) {
228+
end_hashes++;
229+
if (end_hashes == num_hashes) {
230+
BEGIN(INITIAL);
231+
return LIT_BYTE_STR_RAW;
232+
}
233+
}
234+
yymore();
235+
}
236+
<rawbytestr>\x22# {
237+
end_hashes = 1;
238+
if (end_hashes == num_hashes) {
239+
BEGIN(INITIAL);
240+
return LIT_BYTE_STR_RAW;
241+
}
242+
yymore();
243+
}
244+
<rawbytestr>(.|\n) {
245+
if (!saw_non_hash) {
246+
saw_non_hash = 1;
247+
}
248+
if (end_hashes != 0) {
249+
end_hashes = 0;
250+
}
251+
yymore();
252+
}
253+
<rawbytestr><<EOF>> { return -1; }
254+
255+
b\x27 { BEGIN(byte); yymore(); }
256+
<byte>\\[nrt\\\x27\x220]\x27 { BEGIN(INITIAL); return LIT_BYTE; }
257+
<byte>\\x[0-9a-fA-F]{2}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
258+
<byte>\\u[0-9a-fA-F]{4}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
259+
<byte>\\U[0-9a-fA-F]{8}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
260+
<byte>.\x27 { BEGIN(INITIAL); return LIT_BYTE; }
261+
<byte><<EOF>> { BEGIN(INITIAL); return -1; }
262+
263+
r\x22 { BEGIN(rawstr); yymore(); }
264+
<rawstr>\x22 { BEGIN(suffix); return LIT_STR_RAW; }
265+
<rawstr>(.|\n) { yymore(); }
266+
<rawstr><<EOF>> { return -1; }
267+
268+
r/# {
269+
BEGIN(rawstr_esc_begin);
270+
yymore();
271+
num_hashes = 0;
272+
saw_non_hash = 0;
273+
end_hashes = 0;
274+
}
275+
276+
<rawstr_esc_begin># {
277+
num_hashes++;
278+
yymore();
279+
}
280+
<rawstr_esc_begin>\x22 {
281+
BEGIN(rawstr_esc_body);
282+
yymore();
283+
}
284+
<rawstr_esc_begin>(.|\n) { return -1; }
285+
286+
<rawstr_esc_body>\x22/# {
287+
BEGIN(rawstr_esc_end);
288+
yymore();
289+
}
290+
<rawstr_esc_body>(.|\n) {
291+
yymore();
292+
}
293+
294+
<rawstr_esc_end># {
295+
end_hashes++;
296+
if (end_hashes == num_hashes) {
297+
BEGIN(INITIAL);
298+
return LIT_STR_RAW;
299+
}
300+
yymore();
301+
}
302+
<rawstr_esc_end>[^#] {
303+
end_hashes = 0;
304+
BEGIN(rawstr_esc_body);
305+
yymore();
306+
}
307+
308+
<rawstr_esc_begin,rawstr_esc_body,rawstr_esc_end><<EOF>> { return -1; }
309+
310+
\x22 { BEGIN(str); yymore(); }
311+
<str>\x22 { BEGIN(suffix); return LIT_STR; }
312+
313+
<str><<EOF>> { return -1; }
314+
<str>\\[n\nr\rt\\\x27\x220] { yymore(); }
315+
<str>\\x[0-9a-fA-F]{2} { yymore(); }
316+
<str>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
317+
<str>\\[^n\nrt\\\x27\x220] { return -1; }
318+
<str>(.|\n) { yymore(); }
319+
320+
\<- { return LARROW; }
321+
-\> { return RARROW; }
322+
- { return '-'; }
323+
-= { return MINUSEQ; }
324+
&& { return ANDAND; }
325+
& { return '&'; }
326+
&= { return ANDEQ; }
327+
\|\| { return OROR; }
328+
\| { return '|'; }
329+
\|= { return OREQ; }
330+
\+ { return '+'; }
331+
\+= { return PLUSEQ; }
332+
\* { return '*'; }
333+
\*= { return STAREQ; }
334+
\/ { return '/'; }
335+
\/= { return SLASHEQ; }
336+
\^ { return '^'; }
337+
\^= { return CARETEQ; }
338+
% { return '%'; }
339+
%= { return PERCENTEQ; }
340+
341+
<<EOF>> { return 0; }
342+
343+
%%

0 commit comments

Comments
 (0)