10
10
11
11
use ast;
12
12
use parse:: { ParseSess , PResult , filemap_to_tts} ;
13
- use parse:: new_parser_from_source_str;
13
+ use parse:: { lexer , new_parser_from_source_str} ;
14
14
use parse:: parser:: Parser ;
15
15
use parse:: token;
16
16
use ptr:: P ;
17
- use str :: char_at ;
17
+ use std :: iter :: Peekable ;
18
18
19
19
/// Map a string to tts, using a made-up filename:
20
20
pub fn string_to_tts ( source_str : String ) -> Vec < ast:: TokenTree > {
@@ -87,69 +87,62 @@ pub fn strs_to_idents(ids: Vec<&str> ) -> Vec<ast::Ident> {
87
87
88
88
/// Does the given string match the pattern? whitespace in the first string
89
89
/// may be deleted or replaced with other whitespace to match the pattern.
90
- /// this function is Unicode-ignorant; fortunately, the careful design of
91
- /// UTF-8 mitigates this ignorance. In particular, this function only collapses
92
- /// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate Unicode
93
- /// chars. Unsurprisingly, it doesn't do NKF-normalization(?).
90
+ /// This function is relatively Unicode-ignorant; fortunately, the careful design
91
+ /// of UTF-8 mitigates this ignorance. It doesn't do NKF-normalization(?).
94
92
pub fn matches_codepattern ( a : & str , b : & str ) -> bool {
95
- let mut idx_a = 0 ;
96
- let mut idx_b = 0 ;
93
+ let mut a_iter = a. chars ( ) . peekable ( ) ;
94
+ let mut b_iter = b. chars ( ) . peekable ( ) ;
95
+
97
96
loop {
98
- if idx_a == a. len ( ) && idx_b == b. len ( ) {
99
- return true ;
100
- }
101
- else if idx_a == a. len ( ) { return false ; }
102
- else if idx_b == b. len ( ) {
103
- // maybe the stuff left in a is all ws?
104
- if is_whitespace ( char_at ( a, idx_a) ) {
105
- return scan_for_non_ws_or_end ( a, idx_a) == a. len ( ) ;
106
- } else {
107
- return false ;
97
+ let ( a, b) = match ( a_iter. peek ( ) , b_iter. peek ( ) ) {
98
+ ( None , None ) => return true ,
99
+ ( None , _) => return false ,
100
+ ( Some ( & a) , None ) => {
101
+ if is_pattern_whitespace ( a) {
102
+ break // trailing whitespace check is out of loop for borrowck
103
+ } else {
104
+ return false
105
+ }
108
106
}
109
- }
110
- // ws in both given and pattern:
111
- else if is_whitespace ( char_at ( a, idx_a) )
112
- && is_whitespace ( char_at ( b, idx_b) ) {
113
- idx_a = scan_for_non_ws_or_end ( a, idx_a) ;
114
- idx_b = scan_for_non_ws_or_end ( b, idx_b) ;
115
- }
116
- // ws in given only:
117
- else if is_whitespace ( char_at ( a, idx_a) ) {
118
- idx_a = scan_for_non_ws_or_end ( a, idx_a) ;
119
- }
120
- // *don't* silently eat ws in expected only.
121
- else if char_at ( a, idx_a) == char_at ( b, idx_b) {
122
- idx_a += 1 ;
123
- idx_b += 1 ;
124
- }
125
- else {
126
- return false ;
107
+ ( Some ( & a) , Some ( & b) ) => ( a, b)
108
+ } ;
109
+
110
+ if is_pattern_whitespace ( a) && is_pattern_whitespace ( b) {
111
+ // skip whitespace for a and b
112
+ scan_for_non_ws_or_end ( & mut a_iter) ;
113
+ scan_for_non_ws_or_end ( & mut b_iter) ;
114
+ } else if is_pattern_whitespace ( a) {
115
+ // skip whitespace for a
116
+ scan_for_non_ws_or_end ( & mut a_iter) ;
117
+ } else if a == b {
118
+ a_iter. next ( ) ;
119
+ b_iter. next ( ) ;
120
+ } else {
121
+ return false
127
122
}
128
123
}
124
+
125
+ // check if a has *only* trailing whitespace
126
+ a_iter. all ( is_pattern_whitespace)
129
127
}
130
128
131
- /// Given a string and an index, return the first usize >= idx
132
- /// that is a non-ws-char or is outside of the legal range of
133
- /// the string.
134
- fn scan_for_non_ws_or_end ( a : & str , idx : usize ) -> usize {
135
- let mut i = idx;
136
- let len = a. len ( ) ;
137
- while ( i < len) && ( is_whitespace ( char_at ( a, i) ) ) {
138
- i += 1 ;
129
+ /// Advances the given peekable `Iterator` until it reaches a non-whitespace character
130
+ fn scan_for_non_ws_or_end < I : Iterator < Item = char > > ( iter : & mut Peekable < I > ) {
131
+ while lexer:: is_pattern_whitespace ( iter. peek ( ) . cloned ( ) ) {
132
+ iter. next ( ) ;
139
133
}
140
- i
141
134
}
142
135
143
- /// Copied from lexer.
144
- pub fn is_whitespace ( c : char ) -> bool {
145
- return c == ' ' || c == '\t' || c == '\r' || c == '\n' ;
136
+ pub fn is_pattern_whitespace ( c : char ) -> bool {
137
+ lexer:: is_pattern_whitespace ( Some ( c) )
146
138
}
147
139
148
140
#[ cfg( test) ]
149
141
mod tests {
150
142
use super :: * ;
151
143
152
- #[ test] fn eqmodws ( ) {
144
+ #[ test]
145
+ fn eqmodws ( ) {
153
146
assert_eq ! ( matches_codepattern( "" , "" ) , true ) ;
154
147
assert_eq ! ( matches_codepattern( "" , "a" ) , false ) ;
155
148
assert_eq ! ( matches_codepattern( "a" , "" ) , false ) ;
@@ -160,5 +153,22 @@ mod tests {
160
153
assert_eq ! ( matches_codepattern( "a b" , "a b" ) , true ) ;
161
154
assert_eq ! ( matches_codepattern( "ab" , "a b" ) , false ) ;
162
155
assert_eq ! ( matches_codepattern( "a b" , "ab" ) , true ) ;
156
+ assert_eq ! ( matches_codepattern( " a b" , "ab" ) , true ) ;
157
+ }
158
+
159
+ #[ test]
160
+ fn pattern_whitespace ( ) {
161
+ assert_eq ! ( matches_codepattern( "" , "\x0C " ) , false ) ;
162
+ assert_eq ! ( matches_codepattern( "a b " , "a \u{0085} \n \t \r b" ) , true ) ;
163
+ assert_eq ! ( matches_codepattern( "a b" , "a \u{0085} \n \t \r b " ) , false ) ;
164
+ }
165
+
166
+ #[ test]
167
+ fn non_pattern_whitespace ( ) {
168
+ // These have the property 'White_Space' but not 'Pattern_White_Space'
169
+ assert_eq ! ( matches_codepattern( "a b" , "a\u{2002} b" ) , false ) ;
170
+ assert_eq ! ( matches_codepattern( "a b" , "a\u{2002} b" ) , false ) ;
171
+ assert_eq ! ( matches_codepattern( "\u{205F} a b" , "ab" ) , false ) ;
172
+ assert_eq ! ( matches_codepattern( "a \u{3000} b" , "ab" ) , false ) ;
163
173
}
164
174
}
0 commit comments