@@ -471,10 +471,12 @@ impl Hir {
471471
472472 /// Returns an HIR expression for `.`.
473473 ///
474- /// * [`Dot::AnyChar`] maps to `(?su:.)`.
475- /// * [`Dot::AnyByte`] maps to `(?s-u:.)`.
476- /// * [`Dot::AnyCharExceptNL`] maps to `(?u-s:.)`.
477- /// * [`Dot::AnyByteExceptNL`] maps to `(?-su:.)`.
474+ /// * [`Dot::AnyChar`] maps to `(?su-R:.)`.
475+ /// * [`Dot::AnyByte`] maps to `(?s-Ru:.)`.
476+ /// * [`Dot::AnyCharExceptLF`] maps to `(?u-Rs:.)`.
477+ /// * [`Dot::AnyCharExceptCRLF`] maps to `(?Ru-s:.)`.
478+ /// * [`Dot::AnyByteExceptLF`] maps to `(?-Rsu:.)`.
479+ /// * [`Dot::AnyByteExceptCRLF`] maps to `(?R-su:.)`.
478480 ///
479481 /// Note that this is a convenience routine for constructing the correct
480482 /// character class based on the value of `Dot`. There is no explicit "dot"
@@ -492,18 +494,32 @@ impl Hir {
492494 cls. push ( ClassBytesRange :: new ( b'\0' , b'\xFF' ) ) ;
493495 Hir :: class ( Class :: Bytes ( cls) )
494496 }
495- Dot :: AnyCharExceptNL => {
497+ Dot :: AnyCharExceptLF => {
496498 let mut cls = ClassUnicode :: empty ( ) ;
497499 cls. push ( ClassUnicodeRange :: new ( '\0' , '\x09' ) ) ;
498500 cls. push ( ClassUnicodeRange :: new ( '\x0B' , '\u{10FFFF}' ) ) ;
499501 Hir :: class ( Class :: Unicode ( cls) )
500502 }
501- Dot :: AnyByteExceptNL => {
503+ Dot :: AnyCharExceptCRLF => {
504+ let mut cls = ClassUnicode :: empty ( ) ;
505+ cls. push ( ClassUnicodeRange :: new ( '\0' , '\x09' ) ) ;
506+ cls. push ( ClassUnicodeRange :: new ( '\x0B' , '\x0C' ) ) ;
507+ cls. push ( ClassUnicodeRange :: new ( '\x0E' , '\u{10FFFF}' ) ) ;
508+ Hir :: class ( Class :: Unicode ( cls) )
509+ }
510+ Dot :: AnyByteExceptLF => {
502511 let mut cls = ClassBytes :: empty ( ) ;
503512 cls. push ( ClassBytesRange :: new ( b'\0' , b'\x09' ) ) ;
504513 cls. push ( ClassBytesRange :: new ( b'\x0B' , b'\xFF' ) ) ;
505514 Hir :: class ( Class :: Bytes ( cls) )
506515 }
516+ Dot :: AnyByteExceptCRLF => {
517+ let mut cls = ClassBytes :: empty ( ) ;
518+ cls. push ( ClassBytesRange :: new ( b'\0' , b'\x09' ) ) ;
519+ cls. push ( ClassBytesRange :: new ( b'\x0B' , b'\x0C' ) ) ;
520+ cls. push ( ClassBytesRange :: new ( b'\x0E' , b'\xFF' ) ) ;
521+ Hir :: class ( Class :: Bytes ( cls) )
522+ }
507523 }
508524 }
509525}
@@ -1365,6 +1381,16 @@ pub enum Look {
13651381 /// at the end position of the input, or at the position immediately
13661382 /// preceding a `\n` character.
13671383 EndLF ,
1384+ /// Match the beginning of a line or the beginning of text. Specifically,
1385+ /// this matches at the starting position of the input, or at the position
1386+ /// immediately following either a `\r` or `\n` character, but never after
1387+ /// a `\r` when a `\n` follows.
1388+ StartCRLF ,
1389+ /// Match the end of a line or the end of text. Specifically, this matches
1390+ /// at the end position of the input, or at the position immediately
1391+ /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r`
1392+ /// precedes it.
1393+ EndCRLF ,
13681394 /// Match an ASCII-only word boundary. That is, this matches a position
13691395 /// where the left adjacent character and right adjacent character
13701396 /// correspond to a word and non-word or a non-word and word character.
@@ -1380,30 +1406,34 @@ pub enum Look {
13801406}
13811407
13821408impl Look {
1383- fn from_repr ( repr : u8 ) -> Option < Look > {
1409+ fn from_repr ( repr : u16 ) -> Option < Look > {
13841410 match repr {
13851411 0 => Some ( Look :: Start ) ,
13861412 1 => Some ( Look :: End ) ,
13871413 2 => Some ( Look :: StartLF ) ,
13881414 3 => Some ( Look :: EndLF ) ,
1389- 4 => Some ( Look :: WordAscii ) ,
1390- 5 => Some ( Look :: WordAsciiNegate ) ,
1391- 6 => Some ( Look :: WordUnicode ) ,
1392- 7 => Some ( Look :: WordUnicodeNegate ) ,
1415+ 4 => Some ( Look :: StartCRLF ) ,
1416+ 5 => Some ( Look :: EndCRLF ) ,
1417+ 6 => Some ( Look :: WordAscii ) ,
1418+ 7 => Some ( Look :: WordAsciiNegate ) ,
1419+ 8 => Some ( Look :: WordUnicode ) ,
1420+ 9 => Some ( Look :: WordUnicodeNegate ) ,
13931421 _ => None ,
13941422 }
13951423 }
13961424
1397- fn as_repr ( & self ) -> u8 {
1425+ fn as_repr ( & self ) -> u16 {
13981426 match * self {
13991427 Look :: Start => 0 ,
14001428 Look :: End => 1 ,
14011429 Look :: StartLF => 2 ,
14021430 Look :: EndLF => 3 ,
1403- Look :: WordAscii => 4 ,
1404- Look :: WordAsciiNegate => 5 ,
1405- Look :: WordUnicode => 6 ,
1406- Look :: WordUnicodeNegate => 7 ,
1431+ Look :: StartCRLF => 5 ,
1432+ Look :: EndCRLF => 5 ,
1433+ Look :: WordAscii => 6 ,
1434+ Look :: WordAsciiNegate => 7 ,
1435+ Look :: WordUnicode => 8 ,
1436+ Look :: WordUnicodeNegate => 9 ,
14071437 }
14081438 }
14091439
@@ -1413,6 +1443,8 @@ impl Look {
14131443 Look :: End => 'z' ,
14141444 Look :: StartLF => '^' ,
14151445 Look :: EndLF => '$' ,
1446+ Look :: StartCRLF => '^' ,
1447+ Look :: EndCRLF => '$' ,
14161448 Look :: WordAscii => 'b' ,
14171449 Look :: WordAsciiNegate => 'B' ,
14181450 Look :: WordUnicode => '𝛃' ,
@@ -1505,11 +1537,20 @@ pub enum Dot {
15051537 /// Matches the UTF-8 encoding of any Unicode scalar value except for `\n`.
15061538 ///
15071539 /// This is equivalent to `(?u-s:.)` and also `[\p{any}--\n]`.
1508- AnyCharExceptNL ,
1540+ AnyCharExceptLF ,
1541+ /// Matches the UTF-8 encoding of any Unicode scalar value except for `\r`
1542+ /// and `\n`.
1543+ ///
1544+ /// This is equivalent to `(?uR-s:.)` and also `[\p{any}--\r\n]`.
1545+ AnyCharExceptCRLF ,
15091546 /// Matches any byte value except for `\n`.
15101547 ///
15111548 /// This is equivalent to `(?-su:.)` and also `(?-u:[[\x00-\xFF]--\n])`.
1512- AnyByteExceptNL ,
1549+ AnyByteExceptLF ,
1550+ /// Matches any byte value except for `\r` and `\n`.
1551+ ///
1552+ /// This is equivalent to `(?R-su:.)` and also `(?-u:[[\x00-\xFF]--\r\n])`.
1553+ AnyByteExceptCRLF ,
15131554}
15141555
15151556/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
@@ -2038,7 +2079,7 @@ impl Properties {
20382079/// example, an [`Hir`] provides properties that return `LookSet`s.
20392080#[ derive( Clone , Copy , Default , Eq , PartialEq ) ]
20402081pub struct LookSet {
2041- bits : u8 ,
2082+ bits : u16 ,
20422083}
20432084
20442085impl LookSet {
@@ -2170,8 +2211,8 @@ impl Iterator for LookSetIter {
21702211 #[ inline]
21712212 fn next ( & mut self ) -> Option < Look > {
21722213 // We'll never have more than u8::MAX distinct look-around assertions,
2173- // so 'repr' will always fit into a usize .
2174- let repr = u8 :: try_from ( self . set . bits . trailing_zeros ( ) ) . unwrap ( ) ;
2214+ // so 'repr' will always fit into a u16 .
2215+ let repr = u16 :: try_from ( self . set . bits . trailing_zeros ( ) ) . unwrap ( ) ;
21752216 let look = Look :: from_repr ( repr) ?;
21762217 self . set . remove ( look) ;
21772218 Some ( look)
0 commit comments