@@ -29,29 +29,19 @@ struct LexError {
29
29
30
30
impl < ' a > LexedStr < ' a > {
31
31
pub fn new ( text : & ' a str ) -> LexedStr < ' a > {
32
- let mut res = LexedStr { text, kind : Vec :: new ( ) , start : Vec :: new ( ) , error : Vec :: new ( ) } ;
33
-
34
- let mut offset = 0 ;
32
+ let mut conv = Converter :: new ( text) ;
35
33
if let Some ( shebang_len) = rustc_lexer:: strip_shebang ( text) {
36
- res. push ( SHEBANG , offset) ;
37
- offset = shebang_len
34
+ conv . res . push ( SHEBANG , conv . offset ) ;
35
+ conv . offset = shebang_len;
38
36
} ;
39
- for token in rustc_lexer:: tokenize ( & text[ offset..] ) {
40
- let token_text = & text[ offset..] [ ..token. len ] ;
41
37
42
- let ( kind, err) = from_rustc ( & token. kind , token_text) ;
43
- res. push ( kind, offset) ;
44
- offset += token. len ;
38
+ for token in rustc_lexer:: tokenize ( & text[ conv. offset ..] ) {
39
+ let token_text = & text[ conv. offset ..] [ ..token. len ] ;
45
40
46
- if let Some ( err) = err {
47
- let token = res. len ( ) as u32 ;
48
- let msg = err. to_string ( ) ;
49
- res. error . push ( LexError { msg, token } ) ;
50
- }
41
+ conv. extend_token ( & token. kind , token_text) ;
51
42
}
52
- res. push ( EOF , offset) ;
53
43
54
- res
44
+ conv . finalize_with_eof ( )
55
45
}
56
46
57
47
pub fn single_token ( text : & ' a str ) -> Option < ( SyntaxKind , Option < String > ) > {
@@ -64,8 +54,12 @@ impl<'a> LexedStr<'a> {
64
54
return None ;
65
55
}
66
56
67
- let ( kind, err) = from_rustc ( & token. kind , text) ;
68
- Some ( ( kind, err. map ( |it| it. to_owned ( ) ) ) )
57
+ let mut conv = Converter :: new ( text) ;
58
+ conv. extend_token ( & token. kind , text) ;
59
+ match & * conv. res . kind {
60
+ [ kind] => Some ( ( * kind, conv. res . error . pop ( ) . map ( |it| it. msg . clone ( ) ) ) ) ,
61
+ _ => None ,
62
+ }
69
63
}
70
64
71
65
pub fn as_str ( & self ) -> & str {
@@ -128,148 +122,179 @@ impl<'a> LexedStr<'a> {
128
122
}
129
123
}
130
124
131
- /// Returns `SyntaxKind` and an optional tokenize error message.
132
- fn from_rustc (
133
- kind : & rustc_lexer:: TokenKind ,
134
- token_text : & str ,
135
- ) -> ( SyntaxKind , Option < & ' static str > ) {
136
- // A note on an intended tradeoff:
137
- // We drop some useful information here (see patterns with double dots `..`)
138
- // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
139
- // being `u16` that come from `rowan::SyntaxKind`.
140
- let mut err = "" ;
141
-
142
- let syntax_kind = {
143
- match kind {
144
- rustc_lexer:: TokenKind :: LineComment { doc_style : _ } => COMMENT ,
145
- rustc_lexer:: TokenKind :: BlockComment { doc_style : _, terminated } => {
146
- if !terminated {
147
- err = "Missing trailing `*/` symbols to terminate the block comment" ;
125
+ struct Converter < ' a > {
126
+ res : LexedStr < ' a > ,
127
+ offset : usize ,
128
+ }
129
+
130
+ impl < ' a > Converter < ' a > {
131
+ fn new ( text : & ' a str ) -> Self {
132
+ Self {
133
+ res : LexedStr { text, kind : Vec :: new ( ) , start : Vec :: new ( ) , error : Vec :: new ( ) } ,
134
+ offset : 0 ,
135
+ }
136
+ }
137
+
138
+ fn finalize_with_eof ( mut self ) -> LexedStr < ' a > {
139
+ self . res . push ( EOF , self . offset ) ;
140
+ self . res
141
+ }
142
+
143
+ fn push ( & mut self , kind : SyntaxKind , len : usize , err : Option < & str > ) {
144
+ self . res . push ( kind, self . offset ) ;
145
+ self . offset += len;
146
+
147
+ if let Some ( err) = err {
148
+ let token = self . res . len ( ) as u32 ;
149
+ let msg = err. to_string ( ) ;
150
+ self . res . error . push ( LexError { msg, token } ) ;
151
+ }
152
+ }
153
+
154
+ fn extend_token ( & mut self , kind : & rustc_lexer:: TokenKind , token_text : & str ) {
155
+ // A note on an intended tradeoff:
156
+ // We drop some useful information here (see patterns with double dots `..`)
157
+ // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
158
+ // being `u16` that come from `rowan::SyntaxKind`.
159
+ let mut err = "" ;
160
+
161
+ let syntax_kind = {
162
+ match kind {
163
+ rustc_lexer:: TokenKind :: LineComment { doc_style : _ } => COMMENT ,
164
+ rustc_lexer:: TokenKind :: BlockComment { doc_style : _, terminated } => {
165
+ if !terminated {
166
+ err = "Missing trailing `*/` symbols to terminate the block comment" ;
167
+ }
168
+ COMMENT
148
169
}
149
- COMMENT
150
- }
151
170
152
- rustc_lexer:: TokenKind :: Whitespace => WHITESPACE ,
171
+ rustc_lexer:: TokenKind :: Whitespace => WHITESPACE ,
153
172
154
- rustc_lexer:: TokenKind :: Ident if token_text == "_" => UNDERSCORE ,
155
- rustc_lexer:: TokenKind :: Ident => SyntaxKind :: from_keyword ( token_text) . unwrap_or ( IDENT ) ,
173
+ rustc_lexer:: TokenKind :: Ident if token_text == "_" => UNDERSCORE ,
174
+ rustc_lexer:: TokenKind :: Ident => {
175
+ SyntaxKind :: from_keyword ( token_text) . unwrap_or ( IDENT )
176
+ }
156
177
157
- rustc_lexer:: TokenKind :: RawIdent => IDENT ,
158
- rustc_lexer:: TokenKind :: Literal { kind, .. } => return from_rustc_literal ( kind) ,
178
+ rustc_lexer:: TokenKind :: RawIdent => IDENT ,
179
+ rustc_lexer:: TokenKind :: Literal { kind, .. } => {
180
+ self . extend_literal ( token_text. len ( ) , kind) ;
181
+ return ;
182
+ }
159
183
160
- rustc_lexer:: TokenKind :: Lifetime { starts_with_number } => {
161
- if * starts_with_number {
162
- err = "Lifetime name cannot start with a number" ;
184
+ rustc_lexer:: TokenKind :: Lifetime { starts_with_number } => {
185
+ if * starts_with_number {
186
+ err = "Lifetime name cannot start with a number" ;
187
+ }
188
+ LIFETIME_IDENT
163
189
}
164
- LIFETIME_IDENT
165
- }
166
190
167
- rustc_lexer:: TokenKind :: Semi => T ! [ ; ] ,
168
- rustc_lexer:: TokenKind :: Comma => T ! [ , ] ,
169
- rustc_lexer:: TokenKind :: Dot => T ! [ . ] ,
170
- rustc_lexer:: TokenKind :: OpenParen => T ! [ '(' ] ,
171
- rustc_lexer:: TokenKind :: CloseParen => T ! [ ')' ] ,
172
- rustc_lexer:: TokenKind :: OpenBrace => T ! [ '{' ] ,
173
- rustc_lexer:: TokenKind :: CloseBrace => T ! [ '}' ] ,
174
- rustc_lexer:: TokenKind :: OpenBracket => T ! [ '[' ] ,
175
- rustc_lexer:: TokenKind :: CloseBracket => T ! [ ']' ] ,
176
- rustc_lexer:: TokenKind :: At => T ! [ @] ,
177
- rustc_lexer:: TokenKind :: Pound => T ! [ #] ,
178
- rustc_lexer:: TokenKind :: Tilde => T ! [ ~] ,
179
- rustc_lexer:: TokenKind :: Question => T ! [ ?] ,
180
- rustc_lexer:: TokenKind :: Colon => T ! [ : ] ,
181
- rustc_lexer:: TokenKind :: Dollar => T ! [ $] ,
182
- rustc_lexer:: TokenKind :: Eq => T ! [ =] ,
183
- rustc_lexer:: TokenKind :: Bang => T ! [ !] ,
184
- rustc_lexer:: TokenKind :: Lt => T ! [ <] ,
185
- rustc_lexer:: TokenKind :: Gt => T ! [ >] ,
186
- rustc_lexer:: TokenKind :: Minus => T ! [ -] ,
187
- rustc_lexer:: TokenKind :: And => T ! [ & ] ,
188
- rustc_lexer:: TokenKind :: Or => T ! [ |] ,
189
- rustc_lexer:: TokenKind :: Plus => T ! [ +] ,
190
- rustc_lexer:: TokenKind :: Star => T ! [ * ] ,
191
- rustc_lexer:: TokenKind :: Slash => T ! [ /] ,
192
- rustc_lexer:: TokenKind :: Caret => T ! [ ^] ,
193
- rustc_lexer:: TokenKind :: Percent => T ! [ %] ,
194
- rustc_lexer:: TokenKind :: Unknown => ERROR ,
195
- }
196
- } ;
191
+ rustc_lexer:: TokenKind :: Semi => T ! [ ; ] ,
192
+ rustc_lexer:: TokenKind :: Comma => T ! [ , ] ,
193
+ rustc_lexer:: TokenKind :: Dot => T ! [ . ] ,
194
+ rustc_lexer:: TokenKind :: OpenParen => T ! [ '(' ] ,
195
+ rustc_lexer:: TokenKind :: CloseParen => T ! [ ')' ] ,
196
+ rustc_lexer:: TokenKind :: OpenBrace => T ! [ '{' ] ,
197
+ rustc_lexer:: TokenKind :: CloseBrace => T ! [ '}' ] ,
198
+ rustc_lexer:: TokenKind :: OpenBracket => T ! [ '[' ] ,
199
+ rustc_lexer:: TokenKind :: CloseBracket => T ! [ ']' ] ,
200
+ rustc_lexer:: TokenKind :: At => T ! [ @] ,
201
+ rustc_lexer:: TokenKind :: Pound => T ! [ #] ,
202
+ rustc_lexer:: TokenKind :: Tilde => T ! [ ~] ,
203
+ rustc_lexer:: TokenKind :: Question => T ! [ ?] ,
204
+ rustc_lexer:: TokenKind :: Colon => T ! [ : ] ,
205
+ rustc_lexer:: TokenKind :: Dollar => T ! [ $] ,
206
+ rustc_lexer:: TokenKind :: Eq => T ! [ =] ,
207
+ rustc_lexer:: TokenKind :: Bang => T ! [ !] ,
208
+ rustc_lexer:: TokenKind :: Lt => T ! [ <] ,
209
+ rustc_lexer:: TokenKind :: Gt => T ! [ >] ,
210
+ rustc_lexer:: TokenKind :: Minus => T ! [ -] ,
211
+ rustc_lexer:: TokenKind :: And => T ! [ & ] ,
212
+ rustc_lexer:: TokenKind :: Or => T ! [ |] ,
213
+ rustc_lexer:: TokenKind :: Plus => T ! [ +] ,
214
+ rustc_lexer:: TokenKind :: Star => T ! [ * ] ,
215
+ rustc_lexer:: TokenKind :: Slash => T ! [ /] ,
216
+ rustc_lexer:: TokenKind :: Caret => T ! [ ^] ,
217
+ rustc_lexer:: TokenKind :: Percent => T ! [ %] ,
218
+ rustc_lexer:: TokenKind :: Unknown => ERROR ,
219
+ }
220
+ } ;
197
221
198
- let err = if err. is_empty ( ) { None } else { Some ( err) } ;
199
- ( syntax_kind, err)
200
- }
222
+ let err = if err. is_empty ( ) { None } else { Some ( err) } ;
223
+ self . push ( syntax_kind, token_text . len ( ) , err) ;
224
+ }
201
225
202
- fn from_rustc_literal ( kind : & rustc_lexer:: LiteralKind ) -> ( SyntaxKind , Option < & ' static str > ) {
203
- let mut err = "" ;
226
+ fn extend_literal ( & mut self , len : usize , kind : & rustc_lexer:: LiteralKind ) {
227
+ let mut err = "" ;
204
228
205
- let syntax_kind = match * kind {
206
- rustc_lexer:: LiteralKind :: Int { empty_int, base : _ } => {
207
- if empty_int {
208
- err = "Missing digits after the integer base prefix" ;
229
+ let syntax_kind = match * kind {
230
+ rustc_lexer:: LiteralKind :: Int { empty_int, base : _ } => {
231
+ if empty_int {
232
+ err = "Missing digits after the integer base prefix" ;
233
+ }
234
+ INT_NUMBER
209
235
}
210
- INT_NUMBER
211
- }
212
- rustc_lexer :: LiteralKind :: Float { empty_exponent , base : _ } => {
213
- if empty_exponent {
214
- err = "Missing digits after the exponent symbol" ;
236
+ rustc_lexer :: LiteralKind :: Float { empty_exponent , base : _ } => {
237
+ if empty_exponent {
238
+ err = "Missing digits after the exponent symbol" ;
239
+ }
240
+ FLOAT_NUMBER
215
241
}
216
- FLOAT_NUMBER
217
- }
218
- rustc_lexer :: LiteralKind :: Char { terminated } => {
219
- if !terminated {
220
- err = "Missing trailing `'` symbol to terminate the character literal" ;
242
+ rustc_lexer :: LiteralKind :: Char { terminated } => {
243
+ if !terminated {
244
+ err = "Missing trailing `'` symbol to terminate the character literal" ;
245
+ }
246
+ CHAR
221
247
}
222
- CHAR
223
- }
224
- rustc_lexer :: LiteralKind :: Byte { terminated } => {
225
- if !terminated {
226
- err = "Missing trailing `'` symbol to terminate the byte literal" ;
248
+ rustc_lexer :: LiteralKind :: Byte { terminated } => {
249
+ if !terminated {
250
+ err = "Missing trailing `'` symbol to terminate the byte literal" ;
251
+ }
252
+ BYTE
227
253
}
228
- BYTE
229
- }
230
- rustc_lexer :: LiteralKind :: Str { terminated } => {
231
- if !terminated {
232
- err = "Missing trailing ` \" ` symbol to terminate the string literal" ;
254
+ rustc_lexer :: LiteralKind :: Str { terminated } => {
255
+ if !terminated {
256
+ err = "Missing trailing ` \" ` symbol to terminate the string literal" ;
257
+ }
258
+ STRING
233
259
}
234
- STRING
235
- }
236
- rustc_lexer :: LiteralKind :: ByteStr { terminated } => {
237
- if !terminated {
238
- err = "Missing trailing ` \" ` symbol to terminate the byte string literal" ;
260
+ rustc_lexer :: LiteralKind :: ByteStr { terminated } => {
261
+ if !terminated {
262
+ err = "Missing trailing ` \" ` symbol to terminate the byte string literal" ;
263
+ }
264
+ BYTE_STRING
239
265
}
240
- BYTE_STRING
241
- }
242
- rustc_lexer:: LiteralKind :: RawStr { err : raw_str_err, .. } => {
243
- if let Some ( raw_str_err) = raw_str_err {
244
- err = match raw_str_err {
245
- rustc_lexer:: RawStrError :: InvalidStarter { .. } => "Missing `\" ` symbol after `#` symbols to begin the raw string literal" ,
246
- rustc_lexer:: RawStrError :: NoTerminator { expected, found, .. } => if expected == found {
247
- "Missing trailing `\" ` to terminate the raw string literal"
248
- } else {
249
- "Missing trailing `\" ` with `#` symbols to terminate the raw string literal"
250
- } ,
251
- rustc_lexer:: RawStrError :: TooManyDelimiters { .. } => "Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols" ,
266
+ rustc_lexer:: LiteralKind :: RawStr { err : raw_str_err, .. } => {
267
+ if let Some ( raw_str_err) = raw_str_err {
268
+ err = match raw_str_err {
269
+ rustc_lexer:: RawStrError :: InvalidStarter { .. } => "Missing `\" ` symbol after `#` symbols to begin the raw string literal" ,
270
+ rustc_lexer:: RawStrError :: NoTerminator { expected, found, .. } => if expected == found {
271
+ "Missing trailing `\" ` to terminate the raw string literal"
272
+ } else {
273
+ "Missing trailing `\" ` with `#` symbols to terminate the raw string literal"
274
+ } ,
275
+ rustc_lexer:: RawStrError :: TooManyDelimiters { .. } => "Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols" ,
276
+ } ;
252
277
} ;
253
- } ;
254
- STRING
255
- }
256
- rustc_lexer :: LiteralKind :: RawByteStr { err : raw_str_err , .. } => {
257
- if let Some ( raw_str_err ) = raw_str_err {
258
- err = match raw_str_err {
259
- rustc_lexer:: RawStrError :: InvalidStarter { .. } => "Missing ` \" ` symbol after `#` symbols to begin the raw byte string literal" ,
260
- rustc_lexer :: RawStrError :: NoTerminator { expected , found , .. } => if expected == found {
261
- "Missing trailing ` \" ` to terminate the raw byte string literal"
262
- } else {
263
- "Missing trailing ` \" ` with `#` symbols to terminate the raw byte string literal"
264
- } ,
265
- rustc_lexer :: RawStrError :: TooManyDelimiters { .. } => "Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols" ,
278
+ STRING
279
+ }
280
+ rustc_lexer :: LiteralKind :: RawByteStr { err : raw_str_err , .. } => {
281
+ if let Some ( raw_str_err ) = raw_str_err {
282
+ err = match raw_str_err {
283
+ rustc_lexer :: RawStrError :: InvalidStarter { .. } => "Missing ` \" ` symbol after `#` symbols to begin the raw byte string literal" ,
284
+ rustc_lexer:: RawStrError :: NoTerminator { expected , found , .. } => if expected == found {
285
+ "Missing trailing ` \" ` to terminate the raw byte string literal"
286
+ } else {
287
+ "Missing trailing ` \" ` with `#` symbols to terminate the raw byte string literal"
288
+ } ,
289
+ rustc_lexer :: RawStrError :: TooManyDelimiters { .. } => "Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols" ,
290
+ } ;
266
291
} ;
267
- } ;
268
292
269
- BYTE_STRING
270
- }
271
- } ;
293
+ BYTE_STRING
294
+ }
295
+ } ;
272
296
273
- let err = if err. is_empty ( ) { None } else { Some ( err) } ;
274
- ( syntax_kind, err)
297
+ let err = if err. is_empty ( ) { None } else { Some ( err) } ;
298
+ self . push ( syntax_kind, len, err) ;
299
+ }
275
300
}
0 commit comments