@@ -21,7 +21,7 @@ use exec::{Exec, ExecNoSync};
21
21
use expand:: expand_bytes;
22
22
use error:: Error ;
23
23
use re_builder:: bytes:: RegexBuilder ;
24
- use re_trait:: { self , RegularExpression , Locations , SubCapturesPosIter } ;
24
+ use re_trait:: { self , RegularExpression , SubCapturesPosIter } ;
25
25
26
26
/// Match represents a single match of a regex in a haystack.
27
27
///
@@ -252,10 +252,10 @@ impl Regex {
252
252
/// The `0`th capture group is always unnamed, so it must always be
253
253
/// accessed with `get(0)` or `[0]`.
254
254
pub fn captures < ' t > ( & self , text : & ' t [ u8 ] ) -> Option < Captures < ' t > > {
255
- let mut locs = self . locations ( ) ;
256
- self . read_captures_at ( & mut locs, text, 0 ) . map ( |_| Captures {
255
+ let mut locs = self . capture_locations ( ) ;
256
+ self . captures_read_at ( & mut locs, text, 0 ) . map ( move |_| Captures {
257
257
text : text,
258
- locs : locs,
258
+ locs : locs. 0 ,
259
259
named_groups : self . 0 . capture_name_idx ( ) . clone ( ) ,
260
260
} )
261
261
}
@@ -568,7 +568,6 @@ impl Regex {
568
568
/// The significance of the starting point is that it takes the surrounding
569
569
/// context into consideration. For example, the `\A` anchor can only
570
570
/// match when `start == 0`.
571
- #[ doc( hidden) ]
572
571
pub fn shortest_match_at (
573
572
& self ,
574
573
text : & [ u8 ] ,
@@ -583,7 +582,6 @@ impl Regex {
583
582
/// The significance of the starting point is that it takes the surrounding
584
583
/// context into consideration. For example, the `\A` anchor can only
585
584
/// match when `start == 0`.
586
- #[ doc( hidden) ]
587
585
pub fn is_match_at ( & self , text : & [ u8 ] , start : usize ) -> bool {
588
586
self . shortest_match_at ( text, start) . is_some ( )
589
587
}
@@ -594,7 +592,6 @@ impl Regex {
594
592
/// The significance of the starting point is that it takes the surrounding
595
593
/// context into consideration. For example, the `\A` anchor can only
596
594
/// match when `start == 0`.
597
- #[ doc( hidden) ]
598
595
pub fn find_at < ' t > (
599
596
& self ,
600
597
text : & ' t [ u8 ] ,
@@ -604,21 +601,55 @@ impl Regex {
604
601
. map ( |( s, e) | Match :: new ( text, s, e) )
605
602
}
606
603
607
- /// Returns the same as captures, but starts the search at the given
604
+ /// This is like `captures`, but uses
605
+ /// [`CaptureLocations`](struct.CaptureLocations.html)
606
+ /// instead of
607
+ /// [`Captures`](struct.Captures.html) in order to amortize allocations.
608
+ ///
609
+ /// To create a `CaptureLocations` value, use the
610
+ /// `Regex::capture_locations` method.
611
+ ///
612
+ /// This returns the overall match if this was successful, which is always
613
+ /// equivalence to the `0`th capture group.
614
+ pub fn captures_read < ' t > (
615
+ & self ,
616
+ locs : & mut CaptureLocations ,
617
+ text : & ' t [ u8 ] ,
618
+ ) -> Option < Match < ' t > > {
619
+ self . captures_read_at ( locs, text, 0 )
620
+ }
621
+
622
+ /// Returns the same as `captures_read`, but starts the search at the given
608
623
/// offset and populates the capture locations given.
609
624
///
610
625
/// The significance of the starting point is that it takes the surrounding
611
626
/// context into consideration. For example, the `\A` anchor can only
612
627
/// match when `start == 0`.
628
+ pub fn captures_read_at < ' t > (
629
+ & self ,
630
+ locs : & mut CaptureLocations ,
631
+ text : & ' t [ u8 ] ,
632
+ start : usize ,
633
+ ) -> Option < Match < ' t > > {
634
+ self . 0
635
+ . searcher ( )
636
+ . captures_read_at ( & mut locs. 0 , text, start)
637
+ . map ( |( s, e) | Match :: new ( text, s, e) )
638
+ }
639
+
640
+ /// An undocumented alias for `captures_read_at`.
641
+ ///
642
+ /// The `regex-capi` crate previously used this routine, so to avoid
643
+ /// breaking that crate, we continue to provide the name as an undocumented
644
+ /// alias.
613
645
#[ doc( hidden) ]
614
646
pub fn read_captures_at < ' t > (
615
647
& self ,
616
- locs : & mut Locations ,
648
+ locs : & mut CaptureLocations ,
617
649
text : & ' t [ u8 ] ,
618
650
start : usize ,
619
651
) -> Option < Match < ' t > > {
620
- self . 0 . searcher ( ) . read_captures_at ( locs, text, start)
621
- . map ( |( s, e) | Match :: new ( text, s, e) )
652
+ self . captures_read_at ( locs, text, start)
622
653
}
623
654
}
624
655
@@ -639,11 +670,19 @@ impl Regex {
639
670
self . 0 . capture_names ( ) . len ( )
640
671
}
641
672
642
- /// Returns an empty set of locations that can be reused in multiple calls
643
- /// to `read_captures`.
673
+ /// Returns an empty set of capture locations that can be reused in
674
+ /// multiple calls to `captures_read` or `captures_read_at`.
675
+ pub fn capture_locations ( & self ) -> CaptureLocations {
676
+ CaptureLocations ( self . 0 . searcher ( ) . locations ( ) )
677
+ }
678
+
679
+ /// An alias for `capture_locations` to preserve backward compatibility.
680
+ ///
681
+ /// The `regex-capi` crate uses this method, so to avoid breaking that
682
+ /// crate, we continue to export it as an undocumented API.
644
683
#[ doc( hidden) ]
645
- pub fn locations ( & self ) -> Locations {
646
- self . 0 . searcher ( ) . locations ( )
684
+ pub fn locations ( & self ) -> CaptureLocations {
685
+ CaptureLocations ( self . 0 . searcher ( ) . locations ( ) )
647
686
}
648
687
}
649
688
@@ -769,6 +808,63 @@ impl<'r> Iterator for CaptureNames<'r> {
769
808
}
770
809
}
771
810
811
+ /// CaptureLocations is a low level representation of the raw offsets of each
812
+ /// submatch.
813
+ ///
814
+ /// You can think of this as a lower level
815
+ /// [`Captures`](struct.Captures.html), where this type does not support
816
+ /// named capturing groups directly and it does not borrow the text that these
817
+ /// offsets were matched on.
818
+ ///
819
+ /// Primarily, this type is useful when using the lower level `Regex` APIs
820
+ /// such as `read_captures`, which permits amortizing the allocation in which
821
+ /// capture match locations are stored.
822
+ ///
823
+ /// In order to build a value of this type, you'll need to call the
824
+ /// `capture_locations` method on the `Regex` being used to execute the search.
825
+ /// The value returned can then be reused in subsequent searches.
826
+ #[ derive( Clone , Debug ) ]
827
+ pub struct CaptureLocations ( re_trait:: Locations ) ;
828
+
829
+ /// A type alias for `CaptureLocations` for backwards compatibility.
830
+ ///
831
+ /// Previously, we exported `CaptureLocations` as `Locations` in an
832
+ /// undocumented API. To prevent breaking that code (e.g., in `regex-capi`),
833
+ /// we continue re-exporting the same undocumented API.
834
+ #[ doc( hidden) ]
835
+ pub type Locations = CaptureLocations ;
836
+
837
+ impl CaptureLocations {
838
+ /// Returns the start and end positions of the Nth capture group. Returns
839
+ /// `None` if `i` is not a valid capture group or if the capture group did
840
+ /// not match anything. The positions returned are *always* byte indices
841
+ /// with respect to the original string matched.
842
+ #[ inline]
843
+ pub fn get ( & self , i : usize ) -> Option < ( usize , usize ) > {
844
+ self . 0 . pos ( i)
845
+ }
846
+
847
+ /// Returns the total number of capturing groups.
848
+ ///
849
+ /// This is always at least `1` since every regex has at least `1`
850
+ /// capturing group that corresponds to the entire match.
851
+ #[ inline]
852
+ pub fn len ( & self ) -> usize {
853
+ self . 0 . len ( )
854
+ }
855
+
856
+ /// An alias for the `get` method for backwards compatibility.
857
+ ///
858
+ /// Previously, we exported `get` as `pos` in an undocumented API. To
859
+ /// prevent breaking that code (e.g., in `regex-capi`), we continue
860
+ /// re-exporting the same undocumented API.
861
+ #[ doc( hidden) ]
862
+ #[ inline]
863
+ pub fn pos ( & self , i : usize ) -> Option < ( usize , usize ) > {
864
+ self . get ( i)
865
+ }
866
+ }
867
+
772
868
/// Captures represents a group of captured byte strings for a single match.
773
869
///
774
870
/// The 0th capture always corresponds to the entire match. Each subsequent
@@ -782,7 +878,7 @@ impl<'r> Iterator for CaptureNames<'r> {
782
878
/// `'t` is the lifetime of the matched text.
783
879
pub struct Captures < ' t > {
784
880
text : & ' t [ u8 ] ,
785
- locs : Locations ,
881
+ locs : re_trait :: Locations ,
786
882
named_groups : Arc < HashMap < String , usize > > ,
787
883
}
788
884
0 commit comments