@@ -18,7 +18,7 @@ use alloc::{
18
18
use crate :: {
19
19
ast:: { self , Ast , Position , Span } ,
20
20
either:: Either ,
21
- is_meta_character,
21
+ is_escapeable_character , is_meta_character,
22
22
} ;
23
23
24
24
type Result < T > = core:: result:: Result < T , ast:: Error > ;
@@ -1495,7 +1495,14 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
1495
1495
if is_meta_character ( c) {
1496
1496
return Ok ( Primitive :: Literal ( ast:: Literal {
1497
1497
span,
1498
- kind : ast:: LiteralKind :: Punctuation ,
1498
+ kind : ast:: LiteralKind :: Meta ,
1499
+ c,
1500
+ } ) ) ;
1501
+ }
1502
+ if is_escapeable_character ( c) {
1503
+ return Ok ( Primitive :: Literal ( ast:: Literal {
1504
+ span,
1505
+ kind : ast:: LiteralKind :: Superfluous ,
1499
1506
c,
1500
1507
} ) ) ;
1501
1508
}
@@ -1513,9 +1520,6 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
1513
1520
'n' => special ( ast:: SpecialLiteralKind :: LineFeed , '\n' ) ,
1514
1521
'r' => special ( ast:: SpecialLiteralKind :: CarriageReturn , '\r' ) ,
1515
1522
'v' => special ( ast:: SpecialLiteralKind :: VerticalTab , '\x0B' ) ,
1516
- ' ' if self . ignore_whitespace ( ) => {
1517
- special ( ast:: SpecialLiteralKind :: Space , ' ' )
1518
- }
1519
1523
'A' => Ok ( Primitive :: Assertion ( ast:: Assertion {
1520
1524
span,
1521
1525
kind : ast:: AssertionKind :: StartText ,
@@ -2420,13 +2424,9 @@ mod tests {
2420
2424
lit_with ( c, span ( start..start + c. len_utf8 ( ) ) )
2421
2425
}
2422
2426
2423
- /// Create a punctuation literal starting at the given position.
2424
- fn punct_lit ( c : char , span : Span ) -> Ast {
2425
- Ast :: Literal ( ast:: Literal {
2426
- span,
2427
- kind : ast:: LiteralKind :: Punctuation ,
2428
- c,
2429
- } )
2427
+ /// Create a meta literal starting at the given position.
2428
+ fn meta_lit ( c : char , span : Span ) -> Ast {
2429
+ Ast :: Literal ( ast:: Literal { span, kind : ast:: LiteralKind :: Meta , c } )
2430
2430
}
2431
2431
2432
2432
/// Create a verbatim literal with the given span.
@@ -2710,24 +2710,24 @@ bar
2710
2710
Ok ( concat(
2711
2711
0 ..36 ,
2712
2712
vec![
2713
- punct_lit ( '\\' , span( 0 ..2 ) ) ,
2714
- punct_lit ( '.' , span( 2 ..4 ) ) ,
2715
- punct_lit ( '+' , span( 4 ..6 ) ) ,
2716
- punct_lit ( '*' , span( 6 ..8 ) ) ,
2717
- punct_lit ( '?' , span( 8 ..10 ) ) ,
2718
- punct_lit ( '(' , span( 10 ..12 ) ) ,
2719
- punct_lit ( ')' , span( 12 ..14 ) ) ,
2720
- punct_lit ( '|' , span( 14 ..16 ) ) ,
2721
- punct_lit ( '[' , span( 16 ..18 ) ) ,
2722
- punct_lit ( ']' , span( 18 ..20 ) ) ,
2723
- punct_lit ( '{' , span( 20 ..22 ) ) ,
2724
- punct_lit ( '}' , span( 22 ..24 ) ) ,
2725
- punct_lit ( '^' , span( 24 ..26 ) ) ,
2726
- punct_lit ( '$' , span( 26 ..28 ) ) ,
2727
- punct_lit ( '#' , span( 28 ..30 ) ) ,
2728
- punct_lit ( '&' , span( 30 ..32 ) ) ,
2729
- punct_lit ( '-' , span( 32 ..34 ) ) ,
2730
- punct_lit ( '~' , span( 34 ..36 ) ) ,
2713
+ meta_lit ( '\\' , span( 0 ..2 ) ) ,
2714
+ meta_lit ( '.' , span( 2 ..4 ) ) ,
2715
+ meta_lit ( '+' , span( 4 ..6 ) ) ,
2716
+ meta_lit ( '*' , span( 6 ..8 ) ) ,
2717
+ meta_lit ( '?' , span( 8 ..10 ) ) ,
2718
+ meta_lit ( '(' , span( 10 ..12 ) ) ,
2719
+ meta_lit ( ')' , span( 12 ..14 ) ) ,
2720
+ meta_lit ( '|' , span( 14 ..16 ) ) ,
2721
+ meta_lit ( '[' , span( 16 ..18 ) ) ,
2722
+ meta_lit ( ']' , span( 18 ..20 ) ) ,
2723
+ meta_lit ( '{' , span( 20 ..22 ) ) ,
2724
+ meta_lit ( '}' , span( 22 ..24 ) ) ,
2725
+ meta_lit ( '^' , span( 24 ..26 ) ) ,
2726
+ meta_lit ( '$' , span( 26 ..28 ) ) ,
2727
+ meta_lit ( '#' , span( 28 ..30 ) ) ,
2728
+ meta_lit ( '&' , span( 30 ..32 ) ) ,
2729
+ meta_lit ( '-' , span( 32 ..34 ) ) ,
2730
+ meta_lit ( '~' , span( 34 ..36 ) ) ,
2731
2731
]
2732
2732
) )
2733
2733
) ;
@@ -2879,23 +2879,12 @@ bar
2879
2879
flag_set( pat, 0 ..4 , ast:: Flag :: IgnoreWhitespace , false ) ,
2880
2880
Ast :: Literal ( ast:: Literal {
2881
2881
span: span_range( pat, 4 ..6 ) ,
2882
- kind: ast:: LiteralKind :: Special (
2883
- ast:: SpecialLiteralKind :: Space
2884
- ) ,
2882
+ kind: ast:: LiteralKind :: Superfluous ,
2885
2883
c: ' ' ,
2886
2884
} ) ,
2887
2885
]
2888
2886
) )
2889
2887
) ;
2890
- // ... but only when `x` mode is enabled.
2891
- let pat = r"\ " ;
2892
- assert_eq ! (
2893
- parser( pat) . parse( ) . unwrap_err( ) ,
2894
- TestError {
2895
- span: span_range( pat, 0 ..2 ) ,
2896
- kind: ast:: ErrorKind :: EscapeUnrecognized ,
2897
- }
2898
- ) ;
2899
2888
}
2900
2889
2901
2890
#[ test]
@@ -4246,7 +4235,7 @@ bar
4246
4235
parser( r"\|" ) . parse_primitive( ) ,
4247
4236
Ok ( Primitive :: Literal ( ast:: Literal {
4248
4237
span: span( 0 ..2 ) ,
4249
- kind: ast:: LiteralKind :: Punctuation ,
4238
+ kind: ast:: LiteralKind :: Meta ,
4250
4239
c: '|' ,
4251
4240
} ) )
4252
4241
) ;
@@ -4297,11 +4286,26 @@ bar
4297
4286
} ) )
4298
4287
) ;
4299
4288
4289
+ // We also support superfluous escapes in most cases now too.
4290
+ for c in [ '!' , '@' , '%' , '"' , '\'' , '/' , ' ' ] {
4291
+ let pat = format ! ( r"\{}" , c) ;
4292
+ assert_eq ! (
4293
+ parser( & pat) . parse_primitive( ) ,
4294
+ Ok ( Primitive :: Literal ( ast:: Literal {
4295
+ span: span( 0 ..2 ) ,
4296
+ kind: ast:: LiteralKind :: Superfluous ,
4297
+ c,
4298
+ } ) )
4299
+ ) ;
4300
+ }
4301
+
4302
+ // Some superfluous escapes, namely [0-9A-Za-z], are still banned. This
4303
+ // gives flexibility for future evolution.
4300
4304
assert_eq ! (
4301
- parser( r"\" ) . parse_escape( ) . unwrap_err( ) ,
4305
+ parser( r"\e " ) . parse_escape( ) . unwrap_err( ) ,
4302
4306
TestError {
4303
- span: span( 0 ..1 ) ,
4304
- kind: ast:: ErrorKind :: EscapeUnexpectedEof ,
4307
+ span: span( 0 ..2 ) ,
4308
+ kind: ast:: ErrorKind :: EscapeUnrecognized ,
4305
4309
}
4306
4310
) ;
4307
4311
assert_eq ! (
@@ -4311,6 +4315,31 @@ bar
4311
4315
kind: ast:: ErrorKind :: EscapeUnrecognized ,
4312
4316
}
4313
4317
) ;
4318
+ // But also, < and > are banned, so that we may evolve them into
4319
+ // start/end word boundary assertions. (Not sure if we will...)
4320
+ assert_eq ! (
4321
+ parser( r"\<" ) . parse_escape( ) . unwrap_err( ) ,
4322
+ TestError {
4323
+ span: span( 0 ..2 ) ,
4324
+ kind: ast:: ErrorKind :: EscapeUnrecognized ,
4325
+ }
4326
+ ) ;
4327
+ assert_eq ! (
4328
+ parser( r"\>" ) . parse_escape( ) . unwrap_err( ) ,
4329
+ TestError {
4330
+ span: span( 0 ..2 ) ,
4331
+ kind: ast:: ErrorKind :: EscapeUnrecognized ,
4332
+ }
4333
+ ) ;
4334
+
4335
+ // An unfinished escape is illegal.
4336
+ assert_eq ! (
4337
+ parser( r"\" ) . parse_escape( ) . unwrap_err( ) ,
4338
+ TestError {
4339
+ span: span( 0 ..1 ) ,
4340
+ kind: ast:: ErrorKind :: EscapeUnexpectedEof ,
4341
+ }
4342
+ ) ;
4314
4343
}
4315
4344
4316
4345
#[ test]
@@ -4907,7 +4936,7 @@ bar
4907
4936
lit( span( 1 ..2 ) , 'a' ) ,
4908
4937
ast:: ClassSetItem :: Literal ( ast:: Literal {
4909
4938
span: span( 2 ..4 ) ,
4910
- kind: ast:: LiteralKind :: Punctuation ,
4939
+ kind: ast:: LiteralKind :: Meta ,
4911
4940
c: ']' ,
4912
4941
} ) ,
4913
4942
]
@@ -4925,7 +4954,7 @@ bar
4925
4954
lit( span( 1 ..2 ) , 'a' ) ,
4926
4955
ast:: ClassSetItem :: Literal ( ast:: Literal {
4927
4956
span: span( 2 ..4 ) ,
4928
- kind: ast:: LiteralKind :: Punctuation ,
4957
+ kind: ast:: LiteralKind :: Meta ,
4929
4958
c: '-' ,
4930
4959
} ) ,
4931
4960
lit( span( 4 ..5 ) , 'z' ) ,
@@ -5117,7 +5146,7 @@ bar
5117
5146
span( 1 ..6 ) ,
5118
5147
itemset( ast:: ClassSetItem :: Literal ( ast:: Literal {
5119
5148
span: span( 1 ..3 ) ,
5120
- kind: ast:: LiteralKind :: Punctuation ,
5149
+ kind: ast:: LiteralKind :: Meta ,
5121
5150
c: '^' ,
5122
5151
} ) ) ,
5123
5152
itemset( lit( span( 5 ..6 ) , '^' ) ) ,
@@ -5133,7 +5162,7 @@ bar
5133
5162
span( 1 ..6 ) ,
5134
5163
itemset( ast:: ClassSetItem :: Literal ( ast:: Literal {
5135
5164
span: span( 1 ..3 ) ,
5136
- kind: ast:: LiteralKind :: Punctuation ,
5165
+ kind: ast:: LiteralKind :: Meta ,
5137
5166
c: '&' ,
5138
5167
} ) ) ,
5139
5168
itemset( lit( span( 5 ..6 ) , '&' ) ) ,
@@ -5198,7 +5227,7 @@ bar
5198
5227
lit( span( 1 ..2 ) , ']' ) ,
5199
5228
ast:: ClassSetItem :: Literal ( ast:: Literal {
5200
5229
span: span( 2 ..4 ) ,
5201
- kind: ast:: LiteralKind :: Punctuation ,
5230
+ kind: ast:: LiteralKind :: Meta ,
5202
5231
c: '[' ,
5203
5232
} ) ,
5204
5233
]
@@ -5216,7 +5245,7 @@ bar
5216
5245
kind: itemset( ast:: ClassSetItem :: Literal (
5217
5246
ast:: Literal {
5218
5247
span: span( 1 ..3 ) ,
5219
- kind: ast:: LiteralKind :: Punctuation ,
5248
+ kind: ast:: LiteralKind :: Meta ,
5220
5249
c: '[' ,
5221
5250
}
5222
5251
) ) ,
0 commit comments