@@ -471,10 +471,12 @@ impl Hir {
471
471
472
472
/// Returns an HIR expression for `.`.
473
473
///
474
- /// * [`Dot::AnyChar`] maps to `(?su:.)`.
475
- /// * [`Dot::AnyByte`] maps to `(?s-u:.)`.
476
- /// * [`Dot::AnyCharExceptNL`] maps to `(?u-s:.)`.
477
- /// * [`Dot::AnyByteExceptNL`] maps to `(?-su:.)`.
474
+ /// * [`Dot::AnyChar`] maps to `(?su-R:.)`.
475
+ /// * [`Dot::AnyByte`] maps to `(?s-Ru:.)`.
476
+ /// * [`Dot::AnyCharExceptLF`] maps to `(?u-Rs:.)`.
477
+ /// * [`Dot::AnyCharExceptCRLF`] maps to `(?Ru-s:.)`.
478
+ /// * [`Dot::AnyByteExceptLF`] maps to `(?-Rsu:.)`.
479
+ /// * [`Dot::AnyByteExceptCRLF`] maps to `(?R-su:.)`.
478
480
///
479
481
/// Note that this is a convenience routine for constructing the correct
480
482
/// character class based on the value of `Dot`. There is no explicit "dot"
@@ -492,18 +494,32 @@ impl Hir {
492
494
cls. push ( ClassBytesRange :: new ( b'\0' , b'\xFF' ) ) ;
493
495
Hir :: class ( Class :: Bytes ( cls) )
494
496
}
495
- Dot :: AnyCharExceptNL => {
497
+ Dot :: AnyCharExceptLF => {
496
498
let mut cls = ClassUnicode :: empty ( ) ;
497
499
cls. push ( ClassUnicodeRange :: new ( '\0' , '\x09' ) ) ;
498
500
cls. push ( ClassUnicodeRange :: new ( '\x0B' , '\u{10FFFF}' ) ) ;
499
501
Hir :: class ( Class :: Unicode ( cls) )
500
502
}
501
- Dot :: AnyByteExceptNL => {
503
+ Dot :: AnyCharExceptCRLF => {
504
+ let mut cls = ClassUnicode :: empty ( ) ;
505
+ cls. push ( ClassUnicodeRange :: new ( '\0' , '\x09' ) ) ;
506
+ cls. push ( ClassUnicodeRange :: new ( '\x0B' , '\x0C' ) ) ;
507
+ cls. push ( ClassUnicodeRange :: new ( '\x0E' , '\u{10FFFF}' ) ) ;
508
+ Hir :: class ( Class :: Unicode ( cls) )
509
+ }
510
+ Dot :: AnyByteExceptLF => {
502
511
let mut cls = ClassBytes :: empty ( ) ;
503
512
cls. push ( ClassBytesRange :: new ( b'\0' , b'\x09' ) ) ;
504
513
cls. push ( ClassBytesRange :: new ( b'\x0B' , b'\xFF' ) ) ;
505
514
Hir :: class ( Class :: Bytes ( cls) )
506
515
}
516
+ Dot :: AnyByteExceptCRLF => {
517
+ let mut cls = ClassBytes :: empty ( ) ;
518
+ cls. push ( ClassBytesRange :: new ( b'\0' , b'\x09' ) ) ;
519
+ cls. push ( ClassBytesRange :: new ( b'\x0B' , b'\x0C' ) ) ;
520
+ cls. push ( ClassBytesRange :: new ( b'\x0E' , b'\xFF' ) ) ;
521
+ Hir :: class ( Class :: Bytes ( cls) )
522
+ }
507
523
}
508
524
}
509
525
}
@@ -1365,6 +1381,16 @@ pub enum Look {
1365
1381
/// at the end position of the input, or at the position immediately
1366
1382
/// preceding a `\n` character.
1367
1383
EndLF ,
1384
+ /// Match the beginning of a line or the beginning of text. Specifically,
1385
+ /// this matches at the starting position of the input, or at the position
1386
+ /// immediately following either a `\r` or `\n` character, but never after
1387
+ /// a `\r` when a `\n` follows.
1388
+ StartCRLF ,
1389
+ /// Match the end of a line or the end of text. Specifically, this matches
1390
+ /// at the end position of the input, or at the position immediately
1391
+ /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r`
1392
+ /// precedes it.
1393
+ EndCRLF ,
1368
1394
/// Match an ASCII-only word boundary. That is, this matches a position
1369
1395
/// where the left adjacent character and right adjacent character
1370
1396
/// correspond to a word and non-word or a non-word and word character.
@@ -1380,30 +1406,34 @@ pub enum Look {
1380
1406
}
1381
1407
1382
1408
impl Look {
1383
- fn from_repr ( repr : u8 ) -> Option < Look > {
1409
+ fn from_repr ( repr : u16 ) -> Option < Look > {
1384
1410
match repr {
1385
1411
0 => Some ( Look :: Start ) ,
1386
1412
1 => Some ( Look :: End ) ,
1387
1413
2 => Some ( Look :: StartLF ) ,
1388
1414
3 => Some ( Look :: EndLF ) ,
1389
- 4 => Some ( Look :: WordAscii ) ,
1390
- 5 => Some ( Look :: WordAsciiNegate ) ,
1391
- 6 => Some ( Look :: WordUnicode ) ,
1392
- 7 => Some ( Look :: WordUnicodeNegate ) ,
1415
+ 4 => Some ( Look :: StartCRLF ) ,
1416
+ 5 => Some ( Look :: EndCRLF ) ,
1417
+ 6 => Some ( Look :: WordAscii ) ,
1418
+ 7 => Some ( Look :: WordAsciiNegate ) ,
1419
+ 8 => Some ( Look :: WordUnicode ) ,
1420
+ 9 => Some ( Look :: WordUnicodeNegate ) ,
1393
1421
_ => None ,
1394
1422
}
1395
1423
}
1396
1424
1397
- fn as_repr ( & self ) -> u8 {
1425
+ fn as_repr ( & self ) -> u16 {
1398
1426
match * self {
1399
1427
Look :: Start => 0 ,
1400
1428
Look :: End => 1 ,
1401
1429
Look :: StartLF => 2 ,
1402
1430
Look :: EndLF => 3 ,
1403
- Look :: WordAscii => 4 ,
1404
- Look :: WordAsciiNegate => 5 ,
1405
- Look :: WordUnicode => 6 ,
1406
- Look :: WordUnicodeNegate => 7 ,
1431
+ Look :: StartCRLF => 5 ,
1432
+ Look :: EndCRLF => 5 ,
1433
+ Look :: WordAscii => 6 ,
1434
+ Look :: WordAsciiNegate => 7 ,
1435
+ Look :: WordUnicode => 8 ,
1436
+ Look :: WordUnicodeNegate => 9 ,
1407
1437
}
1408
1438
}
1409
1439
@@ -1413,6 +1443,8 @@ impl Look {
1413
1443
Look :: End => 'z' ,
1414
1444
Look :: StartLF => '^' ,
1415
1445
Look :: EndLF => '$' ,
1446
+ Look :: StartCRLF => '^' ,
1447
+ Look :: EndCRLF => '$' ,
1416
1448
Look :: WordAscii => 'b' ,
1417
1449
Look :: WordAsciiNegate => 'B' ,
1418
1450
Look :: WordUnicode => '𝛃' ,
@@ -1505,11 +1537,20 @@ pub enum Dot {
1505
1537
/// Matches the UTF-8 encoding of any Unicode scalar value except for `\n`.
1506
1538
///
1507
1539
/// This is equivalent to `(?u-s:.)` and also `[\p{any}--\n]`.
1508
- AnyCharExceptNL ,
1540
+ AnyCharExceptLF ,
1541
+ /// Matches the UTF-8 encoding of any Unicode scalar value except for `\r`
1542
+ /// and `\n`.
1543
+ ///
1544
+ /// This is equivalent to `(?uR-s:.)` and also `[\p{any}--\r\n]`.
1545
+ AnyCharExceptCRLF ,
1509
1546
/// Matches any byte value except for `\n`.
1510
1547
///
1511
1548
/// This is equivalent to `(?-su:.)` and also `(?-u:[[\x00-\xFF]--\n])`.
1512
- AnyByteExceptNL ,
1549
+ AnyByteExceptLF ,
1550
+ /// Matches any byte value except for `\r` and `\n`.
1551
+ ///
1552
+ /// This is equivalent to `(?R-su:.)` and also `(?-u:[[\x00-\xFF]--\r\n])`.
1553
+ AnyByteExceptCRLF ,
1513
1554
}
1514
1555
1515
1556
/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
@@ -2038,7 +2079,7 @@ impl Properties {
2038
2079
/// example, an [`Hir`] provides properties that return `LookSet`s.
2039
2080
#[ derive( Clone , Copy , Default , Eq , PartialEq ) ]
2040
2081
pub struct LookSet {
2041
- bits : u8 ,
2082
+ bits : u16 ,
2042
2083
}
2043
2084
2044
2085
impl LookSet {
@@ -2170,8 +2211,8 @@ impl Iterator for LookSetIter {
2170
2211
#[ inline]
2171
2212
fn next ( & mut self ) -> Option < Look > {
2172
2213
// We'll never have more than u8::MAX distinct look-around assertions,
2173
- // so 'repr' will always fit into a usize .
2174
- let repr = u8 :: try_from ( self . set . bits . trailing_zeros ( ) ) . unwrap ( ) ;
2214
+ // so 'repr' will always fit into a u16 .
2215
+ let repr = u16 :: try_from ( self . set . bits . trailing_zeros ( ) ) . unwrap ( ) ;
2175
2216
let look = Look :: from_repr ( repr) ?;
2176
2217
self . set . remove ( look) ;
2177
2218
Some ( look)
0 commit comments