@@ -680,7 +680,7 @@ impl CharClass {
680680 self . canonicalize ( )
681681 }
682682
683- /// Canonicalze any sequence of ranges.
683+ /// Canonicalize any sequence of ranges.
684684 ///
685685 /// This is responsible for enforcing the canonical format invariants
686686 /// as described on the docs for the `CharClass` type.
@@ -703,6 +703,43 @@ impl CharClass {
703703 ordered
704704 }
705705
706+ /// Calculate the intersection of two canonical character classes.
707+ ///
708+ /// The returned intersection is canonical.
709+ fn intersection ( & self , other : & CharClass ) -> CharClass {
710+ if self . ranges . is_empty ( ) || other. ranges . is_empty ( ) {
711+ return CharClass :: empty ( ) ;
712+ }
713+
714+ let mut intersection = CharClass :: empty ( ) ;
715+
716+ let mut iter_a = self . ranges . iter ( ) ;
717+ let mut iter_b = other. ranges . iter ( ) ;
718+ let mut a = iter_a. next ( ) . unwrap ( ) ;
719+ let mut b = iter_b. next ( ) . unwrap ( ) ;
720+ loop {
721+ if let Some ( i) = a. intersection ( & b) {
722+ intersection. ranges . push ( i) ;
723+ }
724+
725+ // If the range with the smaller end didn't match this time,
726+ // it won't ever match, so move on to the next one.
727+ let ( iter, item) = if a. end < b. end {
728+ ( & mut iter_a, & mut a)
729+ } else {
730+ ( & mut iter_b, & mut b)
731+ } ;
732+ if let Some ( v) = iter. next ( ) {
733+ * item = v;
734+ } else {
735+ // No more ranges to check, done.
736+ break ;
737+ }
738+ }
739+
740+ intersection. canonicalize ( )
741+ }
742+
706743 /// Negates the character class.
707744 ///
708745 /// For all `c` where `c` is a Unicode scalar value, `c` matches `self`
@@ -801,6 +838,18 @@ impl ClassRange {
801838 max ( self . start , other. start ) <= inc_char ( min ( self . end , other. end ) )
802839 }
803840
841+ /// Returns the intersection of the two ranges if they have common
842+ /// characters, `None` otherwise.
843+ fn intersection ( & self , other : & ClassRange ) -> Option < ClassRange > {
844+ let start = max ( self . start , other. start ) ;
845+ let end = min ( self . end , other. end ) ;
846+ if start <= end {
847+ Some ( ClassRange :: new ( start, end) )
848+ } else {
849+ None
850+ }
851+ }
852+
804853 /// Creates a new range representing the union of `self` and `other.
805854 fn merge ( self , other : ClassRange ) -> ClassRange {
806855 ClassRange {
@@ -1907,6 +1956,108 @@ mod tests {
19071956 ] ) ) ;
19081957 }
19091958
1959+ #[ test]
1960+ fn class_intersection_empty ( ) {
1961+ let cls1 = class ( & [ ] ) ;
1962+ let cls2 = class ( & [ ( 'a' , 'a' ) ] ) ;
1963+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
1964+ }
1965+
1966+ #[ test]
1967+ fn class_intersection_single_equal ( ) {
1968+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1969+ let cls2 = class ( & [ ( 'a' , 'a' ) ] ) ;
1970+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'a' ) ] ) ) ;
1971+ }
1972+
1973+ #[ test]
1974+ fn class_intersection_single_unequal ( ) {
1975+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1976+ let cls2 = class ( & [ ( 'b' , 'b' ) ] ) ;
1977+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
1978+ }
1979+
1980+ #[ test]
1981+ fn class_intersection_single_in_other ( ) {
1982+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1983+ let cls2 = class ( & [ ( 'a' , 'c' ) ] ) ;
1984+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'a' ) ] ) ) ;
1985+ }
1986+
1987+ #[ test]
1988+ fn class_intersection_range_in_other ( ) {
1989+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
1990+ let cls2 = class ( & [ ( 'a' , 'c' ) ] ) ;
1991+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'b' ) ] ) ) ;
1992+ }
1993+
1994+ #[ test]
1995+ fn class_intersection_range_intersection ( ) {
1996+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
1997+ let cls2 = class ( & [ ( 'b' , 'c' ) ] ) ;
1998+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'b' ) ] ) ) ;
1999+ }
2000+
2001+ #[ test]
2002+ fn class_intersection_only_adjacent ( ) {
2003+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
2004+ let cls2 = class ( & [ ( 'c' , 'd' ) ] ) ;
2005+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2006+ }
2007+
2008+ #[ test]
2009+ fn class_intersection_range_subset ( ) {
2010+ let cls1 = class ( & [ ( 'b' , 'c' ) ] ) ;
2011+ let cls2 = class ( & [ ( 'a' , 'd' ) ] ) ;
2012+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'c' ) ] ) ) ;
2013+ }
2014+
2015+ #[ test]
2016+ fn class_intersection_many_ranges_in_one_big ( ) {
2017+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2018+ let cls2 = class ( & [ ( 'a' , 'h' ) ] ) ;
2019+ assert_intersection ( cls1, cls2, class ( & [
2020+ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' )
2021+ ] ) ) ;
2022+ }
2023+
2024+ #[ test]
2025+ fn class_intersection_many_ranges_same ( ) {
2026+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2027+ let cls2 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2028+ assert_intersection ( cls1, cls2, class ( & [
2029+ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' )
2030+ ] ) ) ;
2031+ }
2032+
2033+ #[ test]
2034+ fn class_intersection_multiple_non_intersecting ( ) {
2035+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'g' , 'h' ) ] ) ;
2036+ let cls2 = class ( & [ ( 'd' , 'e' ) , ( 'k' , 'l' ) ] ) ;
2037+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2038+ }
2039+
2040+ #[ test]
2041+ fn class_intersection_non_intersecting_then_intersecting ( ) {
2042+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2043+ let cls2 = class ( & [ ( 'h' , 'h' ) ] ) ;
2044+ assert_intersection ( cls1, cls2, class ( & [ ( 'h' , 'h' ) ] ) ) ;
2045+ }
2046+
2047+ #[ test]
2048+ fn class_intersection_adjacent_alternating ( ) {
2049+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'e' , 'f' ) , ( 'i' , 'j' ) ] ) ;
2050+ let cls2 = class ( & [ ( 'c' , 'd' ) , ( 'g' , 'h' ) , ( 'k' , 'l' ) ] ) ;
2051+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2052+ }
2053+
2054+ #[ test]
2055+ fn class_intersection_overlapping_alternating ( ) {
2056+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'c' , 'd' ) , ( 'e' , 'f' ) ] ) ;
2057+ let cls2 = class ( & [ ( 'b' , 'c' ) , ( 'd' , 'e' ) , ( 'f' , 'g' ) ] ) ;
2058+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'f' ) ] ) ) ;
2059+ }
2060+
19102061 #[ test]
19112062 fn class_canon_overlap_many_case_fold ( ) {
19122063 let cls = class ( & [
@@ -2056,4 +2207,10 @@ mod tests {
20562207 let expr = e ( "(?-u)[-./]" ) ;
20572208 assert_eq ! ( "(?-u:[-\\ .-/])" , expr. to_string( ) ) ;
20582209 }
2210+
2211+ fn assert_intersection ( cls1 : CharClass , cls2 : CharClass , expected : CharClass ) {
2212+ // intersection operation should be commutative
2213+ assert_eq ! ( cls1. intersection( & cls2) , expected) ;
2214+ assert_eq ! ( cls2. intersection( & cls1) , expected) ;
2215+ }
20592216}
0 commit comments