@@ -30,9 +30,38 @@ use crate::unicode;
30
30
//
31
31
// Tests on this are relegated to the public API of HIR in src/hir.rs.
32
32
33
- #[ derive( Clone , Debug , Eq , PartialEq ) ]
33
+ #[ derive( Clone , Debug ) ]
34
34
pub struct IntervalSet < I > {
35
+ /// A sorted set of non-overlapping ranges.
35
36
ranges : Vec < I > ,
37
+ /// While not required at all for correctness, we keep track of whether an
38
+ /// interval set has been case folded or not. This helps us avoid doing
39
+ /// redundant work if, for example, a set has already been cased folded.
40
+ /// And note that whether a set is folded or not is preserved through
41
+ /// all of the pairwise set operations. That is, if both interval sets
42
+ /// have been case folded, then any of difference, union, intersection or
43
+ /// symmetric difference all produce a case folded set.
44
+ ///
45
+ /// Note that when this is true, it *must* be the case that the set is case
46
+ /// folded. But when it's false, the set *may* be case folded. In other
47
+ /// words, we only set this to true when we know it to be case, but we're
48
+ /// okay with it being false if it would otherwise be costly to determine
49
+ /// whether it should be true. This means code cannot assume that a false
50
+ /// value necessarily indicates that the set is not case folded.
51
+ ///
52
+ /// Bottom line: this is a performance optimization.
53
+ folded : bool ,
54
+ }
55
+
56
+ impl < I : Interval > Eq for IntervalSet < I > { }
57
+
58
+ // We implement PartialEq manually so that we don't consider the set's internal
59
+ // 'folded' property to be part of its identity. The 'folded' property is
60
+ // strictly an optimization.
61
+ impl < I : Interval > PartialEq for IntervalSet < I > {
62
+ fn eq ( & self , other : & IntervalSet < I > ) -> bool {
63
+ self . ranges . eq ( & other. ranges )
64
+ }
36
65
}
37
66
38
67
impl < I : Interval > IntervalSet < I > {
@@ -42,7 +71,10 @@ impl<I: Interval> IntervalSet<I> {
42
71
/// The given ranges do not need to be in any specific order, and ranges
43
72
/// may overlap.
44
73
pub fn new < T : IntoIterator < Item = I > > ( intervals : T ) -> IntervalSet < I > {
45
- let mut set = IntervalSet { ranges : intervals. into_iter ( ) . collect ( ) } ;
74
+ let ranges: Vec < I > = intervals. into_iter ( ) . collect ( ) ;
75
+ // An empty set is case folded.
76
+ let folded = ranges. is_empty ( ) ;
77
+ let mut set = IntervalSet { ranges, folded } ;
46
78
set. canonicalize ( ) ;
47
79
set
48
80
}
@@ -53,6 +85,10 @@ impl<I: Interval> IntervalSet<I> {
53
85
// it preserves canonicalization.
54
86
self . ranges . push ( interval) ;
55
87
self . canonicalize ( ) ;
88
+ // We don't know whether the new interval added here is considered
89
+ // case folded, so we conservatively assume that the entire set is
90
+ // no longer case folded if it was previously.
91
+ self . folded = false ;
56
92
}
57
93
58
94
/// Return an iterator over all intervals in this set.
@@ -77,6 +113,9 @@ impl<I: Interval> IntervalSet<I> {
77
113
/// This returns an error if the necessary case mapping data is not
78
114
/// available.
79
115
pub fn case_fold_simple ( & mut self ) -> Result < ( ) , unicode:: CaseFoldError > {
116
+ if self . folded {
117
+ return Ok ( ( ) ) ;
118
+ }
80
119
let len = self . ranges . len ( ) ;
81
120
for i in 0 ..len {
82
121
let range = self . ranges [ i] ;
@@ -86,14 +125,19 @@ impl<I: Interval> IntervalSet<I> {
86
125
}
87
126
}
88
127
self . canonicalize ( ) ;
128
+ self . folded = true ;
89
129
Ok ( ( ) )
90
130
}
91
131
92
132
/// Union this set with the given set, in place.
93
133
pub fn union ( & mut self , other : & IntervalSet < I > ) {
134
+ if other. ranges . is_empty ( ) {
135
+ return ;
136
+ }
94
137
// This could almost certainly be done more efficiently.
95
138
self . ranges . extend ( & other. ranges ) ;
96
139
self . canonicalize ( ) ;
140
+ self . folded = self . folded && other. folded ;
97
141
}
98
142
99
143
/// Intersect this set with the given set, in place.
@@ -103,6 +147,8 @@ impl<I: Interval> IntervalSet<I> {
103
147
}
104
148
if other. ranges . is_empty ( ) {
105
149
self . ranges . clear ( ) ;
150
+ // An empty set is case folded.
151
+ self . folded = true ;
106
152
return ;
107
153
}
108
154
@@ -132,6 +178,7 @@ impl<I: Interval> IntervalSet<I> {
132
178
}
133
179
}
134
180
self . ranges . drain ( ..drain_end) ;
181
+ self . folded = self . folded && other. folded ;
135
182
}
136
183
137
184
/// Subtract the given set from this set, in place.
@@ -224,6 +271,7 @@ impl<I: Interval> IntervalSet<I> {
224
271
a += 1 ;
225
272
}
226
273
self . ranges . drain ( ..drain_end) ;
274
+ self . folded = self . folded && other. folded ;
227
275
}
228
276
229
277
/// Compute the symmetric difference of the two sets, in place.
@@ -249,6 +297,8 @@ impl<I: Interval> IntervalSet<I> {
249
297
if self . ranges . is_empty ( ) {
250
298
let ( min, max) = ( I :: Bound :: min_value ( ) , I :: Bound :: max_value ( ) ) ;
251
299
self . ranges . push ( I :: create ( min, max) ) ;
300
+ // The set containing everything must case folded.
301
+ self . folded = true ;
252
302
return ;
253
303
}
254
304
@@ -274,6 +324,19 @@ impl<I: Interval> IntervalSet<I> {
274
324
self . ranges . push ( I :: create ( lower, I :: Bound :: max_value ( ) ) ) ;
275
325
}
276
326
self . ranges . drain ( ..drain_end) ;
327
+ // We don't need to update whether this set is folded or not, because
328
+ // it is conservatively preserved through negation. Namely, if a set
329
+ // is not folded, then it is possible that its negation is folded, for
330
+ // example, [^☃]. But we're fine with assuming that the set is not
331
+ // folded in that case. (`folded` permits false negatives but not false
332
+ // positives.)
333
+ //
334
+ // But what about when a set is folded, is its negation also
335
+ // necessarily folded? Yes. Because if a set is folded, then for every
336
+ // character in the set, it necessarily included its equivalence class
337
+ // of case folded characters. Negating it in turn means that all
338
+ // equivalence classes in the set are negated, and any equivalence
339
+ // class that was previously not in the set is now entirely in the set.
277
340
}
278
341
279
342
/// Converts this set into a canonical ordering.
0 commit comments