11use super :: * ;
2+ use unicode_width:: UnicodeWidthChar ;
23
34#[ cfg( test) ]
45mod tests;
@@ -8,12 +9,15 @@ mod tests;
89///
910/// This function will use an SSE2 enhanced implementation if hardware support
1011/// is detected at runtime.
11- pub fn analyze_source_file ( src : & str ) -> ( Vec < RelativeBytePos > , Vec < MultiByteChar > ) {
12+ pub fn analyze_source_file (
13+ src : & str ,
14+ ) -> ( Vec < RelativeBytePos > , Vec < MultiByteChar > , Vec < NonNarrowChar > ) {
1215 let mut lines = vec ! [ RelativeBytePos :: from_u32( 0 ) ] ;
1316 let mut multi_byte_chars = vec ! [ ] ;
17+ let mut non_narrow_chars = vec ! [ ] ;
1418
1519 // Calls the right implementation, depending on hardware support available.
16- analyze_source_file_dispatch ( src, & mut lines, & mut multi_byte_chars) ;
20+ analyze_source_file_dispatch ( src, & mut lines, & mut multi_byte_chars, & mut non_narrow_chars ) ;
1721
1822 // The code above optimistically registers a new line *after* each \n
1923 // it encounters. If that point is already outside the source_file, remove
@@ -26,7 +30,7 @@ pub fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<MultiByteCha
2630 }
2731 }
2832
29- ( lines, multi_byte_chars)
33+ ( lines, multi_byte_chars, non_narrow_chars )
3034}
3135
3236cfg_match ! {
@@ -35,10 +39,11 @@ cfg_match! {
3539 src: & str ,
3640 lines: & mut Vec <RelativeBytePos >,
3741 multi_byte_chars: & mut Vec <MultiByteChar >,
42+ non_narrow_chars: & mut Vec <NonNarrowChar >,
3843 ) {
3944 if is_x86_feature_detected!( "sse2" ) {
4045 unsafe {
41- analyze_source_file_sse2( src, lines, multi_byte_chars) ;
46+ analyze_source_file_sse2( src, lines, multi_byte_chars, non_narrow_chars ) ;
4247 }
4348 } else {
4449 analyze_source_file_generic(
@@ -47,6 +52,7 @@ cfg_match! {
4752 RelativeBytePos :: from_u32( 0 ) ,
4853 lines,
4954 multi_byte_chars,
55+ non_narrow_chars,
5056 ) ;
5157 }
5258 }
@@ -60,6 +66,7 @@ cfg_match! {
6066 src: & str ,
6167 lines: & mut Vec <RelativeBytePos >,
6268 multi_byte_chars: & mut Vec <MultiByteChar >,
69+ non_narrow_chars: & mut Vec <NonNarrowChar >,
6370 ) {
6471 #[ cfg( target_arch = "x86" ) ]
6572 use std:: arch:: x86:: * ;
@@ -152,6 +159,7 @@ cfg_match! {
152159 RelativeBytePos :: from_usize( scan_start) ,
153160 lines,
154161 multi_byte_chars,
162+ non_narrow_chars,
155163 ) ;
156164 }
157165
@@ -164,6 +172,7 @@ cfg_match! {
164172 RelativeBytePos :: from_usize( tail_start) ,
165173 lines,
166174 multi_byte_chars,
175+ non_narrow_chars,
167176 ) ;
168177 }
169178 }
@@ -174,13 +183,15 @@ cfg_match! {
174183 src: & str ,
175184 lines: & mut Vec <RelativeBytePos >,
176185 multi_byte_chars: & mut Vec <MultiByteChar >,
186+ non_narrow_chars: & mut Vec <NonNarrowChar >,
177187 ) {
178188 analyze_source_file_generic(
179189 src,
180190 src. len( ) ,
181191 RelativeBytePos :: from_u32( 0 ) ,
182192 lines,
183193 multi_byte_chars,
194+ non_narrow_chars,
184195 ) ;
185196 }
186197 }
@@ -194,6 +205,7 @@ fn analyze_source_file_generic(
194205 output_offset : RelativeBytePos ,
195206 lines : & mut Vec < RelativeBytePos > ,
196207 multi_byte_chars : & mut Vec < MultiByteChar > ,
208+ non_narrow_chars : & mut Vec < NonNarrowChar > ,
197209) -> usize {
198210 assert ! ( src. len( ) >= scan_len) ;
199211 let mut i = 0 ;
@@ -215,8 +227,16 @@ fn analyze_source_file_generic(
215227
216228 let pos = RelativeBytePos :: from_usize ( i) + output_offset;
217229
218- if let b'\n' = byte {
219- lines. push ( pos + RelativeBytePos ( 1 ) ) ;
230+ match byte {
231+ b'\n' => {
232+ lines. push ( pos + RelativeBytePos ( 1 ) ) ;
233+ }
234+ b'\t' => {
235+ non_narrow_chars. push ( NonNarrowChar :: Tab ( pos) ) ;
236+ }
237+ _ => {
238+ non_narrow_chars. push ( NonNarrowChar :: ZeroWidth ( pos) ) ;
239+ }
220240 }
221241 } else if byte >= 127 {
222242 // The slow path:
@@ -232,6 +252,14 @@ fn analyze_source_file_generic(
232252 let mbc = MultiByteChar { pos, bytes : char_len as u8 } ;
233253 multi_byte_chars. push ( mbc) ;
234254 }
255+
256+ // Assume control characters are zero width.
257+ // FIXME: How can we decide between `width` and `width_cjk`?
258+ let char_width = UnicodeWidthChar :: width ( c) . unwrap_or ( 0 ) ;
259+
260+ if char_width != 1 {
261+ non_narrow_chars. push ( NonNarrowChar :: new ( pos, char_width) ) ;
262+ }
235263 }
236264
237265 i += char_len;
0 commit comments