1
1
use super :: * ;
2
+ use unicode_width:: UnicodeWidthChar ;
2
3
3
4
#[ cfg( test) ]
4
5
mod tests;
@@ -8,12 +9,15 @@ mod tests;
8
9
///
9
10
/// This function will use an SSE2 enhanced implementation if hardware support
10
11
/// is detected at runtime.
11
- pub fn analyze_source_file ( src : & str ) -> ( Vec < RelativeBytePos > , Vec < MultiByteChar > ) {
12
+ pub fn analyze_source_file (
13
+ src : & str ,
14
+ ) -> ( Vec < RelativeBytePos > , Vec < MultiByteChar > , Vec < NonNarrowChar > ) {
12
15
let mut lines = vec ! [ RelativeBytePos :: from_u32( 0 ) ] ;
13
16
let mut multi_byte_chars = vec ! [ ] ;
17
+ let mut non_narrow_chars = vec ! [ ] ;
14
18
15
19
// Calls the right implementation, depending on hardware support available.
16
- analyze_source_file_dispatch ( src, & mut lines, & mut multi_byte_chars) ;
20
+ analyze_source_file_dispatch ( src, & mut lines, & mut multi_byte_chars, & mut non_narrow_chars ) ;
17
21
18
22
// The code above optimistically registers a new line *after* each \n
19
23
// it encounters. If that point is already outside the source_file, remove
@@ -26,7 +30,7 @@ pub fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<MultiByteCha
26
30
}
27
31
}
28
32
29
- ( lines, multi_byte_chars)
33
+ ( lines, multi_byte_chars, non_narrow_chars )
30
34
}
31
35
32
36
cfg_match ! {
@@ -35,10 +39,11 @@ cfg_match! {
35
39
src: & str ,
36
40
lines: & mut Vec <RelativeBytePos >,
37
41
multi_byte_chars: & mut Vec <MultiByteChar >,
42
+ non_narrow_chars: & mut Vec <NonNarrowChar >,
38
43
) {
39
44
if is_x86_feature_detected!( "sse2" ) {
40
45
unsafe {
41
- analyze_source_file_sse2( src, lines, multi_byte_chars) ;
46
+ analyze_source_file_sse2( src, lines, multi_byte_chars, non_narrow_chars ) ;
42
47
}
43
48
} else {
44
49
analyze_source_file_generic(
@@ -47,6 +52,7 @@ cfg_match! {
47
52
RelativeBytePos :: from_u32( 0 ) ,
48
53
lines,
49
54
multi_byte_chars,
55
+ non_narrow_chars,
50
56
) ;
51
57
}
52
58
}
@@ -60,6 +66,7 @@ cfg_match! {
60
66
src: & str ,
61
67
lines: & mut Vec <RelativeBytePos >,
62
68
multi_byte_chars: & mut Vec <MultiByteChar >,
69
+ non_narrow_chars: & mut Vec <NonNarrowChar >,
63
70
) {
64
71
#[ cfg( target_arch = "x86" ) ]
65
72
use std:: arch:: x86:: * ;
@@ -152,6 +159,7 @@ cfg_match! {
152
159
RelativeBytePos :: from_usize( scan_start) ,
153
160
lines,
154
161
multi_byte_chars,
162
+ non_narrow_chars,
155
163
) ;
156
164
}
157
165
@@ -164,6 +172,7 @@ cfg_match! {
164
172
RelativeBytePos :: from_usize( tail_start) ,
165
173
lines,
166
174
multi_byte_chars,
175
+ non_narrow_chars,
167
176
) ;
168
177
}
169
178
}
@@ -174,13 +183,15 @@ cfg_match! {
174
183
src: & str ,
175
184
lines: & mut Vec <RelativeBytePos >,
176
185
multi_byte_chars: & mut Vec <MultiByteChar >,
186
+ non_narrow_chars: & mut Vec <NonNarrowChar >,
177
187
) {
178
188
analyze_source_file_generic(
179
189
src,
180
190
src. len( ) ,
181
191
RelativeBytePos :: from_u32( 0 ) ,
182
192
lines,
183
193
multi_byte_chars,
194
+ non_narrow_chars,
184
195
) ;
185
196
}
186
197
}
@@ -194,6 +205,7 @@ fn analyze_source_file_generic(
194
205
output_offset : RelativeBytePos ,
195
206
lines : & mut Vec < RelativeBytePos > ,
196
207
multi_byte_chars : & mut Vec < MultiByteChar > ,
208
+ non_narrow_chars : & mut Vec < NonNarrowChar > ,
197
209
) -> usize {
198
210
assert ! ( src. len( ) >= scan_len) ;
199
211
let mut i = 0 ;
@@ -215,8 +227,16 @@ fn analyze_source_file_generic(
215
227
216
228
let pos = RelativeBytePos :: from_usize ( i) + output_offset;
217
229
218
- if let b'\n' = byte {
219
- lines. push ( pos + RelativeBytePos ( 1 ) ) ;
230
+ match byte {
231
+ b'\n' => {
232
+ lines. push ( pos + RelativeBytePos ( 1 ) ) ;
233
+ }
234
+ b'\t' => {
235
+ non_narrow_chars. push ( NonNarrowChar :: Tab ( pos) ) ;
236
+ }
237
+ _ => {
238
+ non_narrow_chars. push ( NonNarrowChar :: ZeroWidth ( pos) ) ;
239
+ }
220
240
}
221
241
} else if byte >= 127 {
222
242
// The slow path:
@@ -232,6 +252,14 @@ fn analyze_source_file_generic(
232
252
let mbc = MultiByteChar { pos, bytes : char_len as u8 } ;
233
253
multi_byte_chars. push ( mbc) ;
234
254
}
255
+
256
+ // Assume control characters are zero width.
257
+ // FIXME: How can we decide between `width` and `width_cjk`?
258
+ let char_width = UnicodeWidthChar :: width ( c) . unwrap_or ( 0 ) ;
259
+
260
+ if char_width != 1 {
261
+ non_narrow_chars. push ( NonNarrowChar :: new ( pos, char_width) ) ;
262
+ }
235
263
}
236
264
237
265
i += char_len;
0 commit comments