1111// Format string literals.
1212
1313use regex:: Regex ;
14+ use unicode_categories:: UnicodeCategories ;
1415use unicode_segmentation:: UnicodeSegmentation ;
1516
1617use config:: Config ;
1718use shape:: Shape ;
18- use utils:: wrap_str;
19+ use utils:: { unicode_str_width , wrap_str} ;
1920
2021const MIN_STRING : usize = 10 ;
2122
@@ -53,7 +54,7 @@ impl<'a> StringFormat<'a> {
5354 /// indentation into account.
5455 ///
5556 /// If we cannot put at least a single character per line, the rewrite won't succeed.
56- fn max_chars_with_indent ( & self ) -> Option < usize > {
57+ fn max_width_with_indent ( & self ) -> Option < usize > {
5758 Some (
5859 self . shape
5960 . width
@@ -62,10 +63,10 @@ impl<'a> StringFormat<'a> {
6263 )
6364 }
6465
65- /// Like max_chars_with_indent but the indentation is not subtracted.
66+ /// Like max_width_with_indent but the indentation is not subtracted.
6667 /// This allows to fit more graphemes from the string on a line when
6768 /// SnippetState::EndWithLineFeed.
68- fn max_chars_without_indent ( & self ) -> Option < usize > {
69+ fn max_width_without_indent ( & self ) -> Option < usize > {
6970 Some ( self . config . max_width ( ) . checked_sub ( self . line_end . len ( ) ) ?)
7071 }
7172}
@@ -75,8 +76,8 @@ pub fn rewrite_string<'a>(
7576 fmt : & StringFormat < ' a > ,
7677 newline_max_chars : usize ,
7778) -> Option < String > {
78- let max_chars_with_indent = fmt. max_chars_with_indent ( ) ?;
79- let max_chars_without_indent = fmt. max_chars_without_indent ( ) ?;
79+ let max_width_with_indent = fmt. max_width_with_indent ( ) ?;
80+ let max_width_without_indent = fmt. max_width_without_indent ( ) ?;
8081 let indent_with_newline = fmt. shape . indent . to_string_with_newline ( fmt. config ) ;
8182 let indent_without_newline = fmt. shape . indent . to_string ( fmt. config ) ;
8283
@@ -99,11 +100,11 @@ pub fn rewrite_string<'a>(
99100
100101 // Snip a line at a time from `stripped_str` until it is used up. Push the snippet
101102 // onto result.
102- let mut cur_max_chars = max_chars_with_indent ;
103+ let mut cur_max_width = max_width_with_indent ;
103104 let is_bareline_ok = fmt. line_start . is_empty ( ) || is_whitespace ( fmt. line_start ) ;
104105 loop {
105106 // All the input starting at cur_start fits on the current line
106- if graphemes. len ( ) - cur_start <= cur_max_chars {
107+ if graphemes_width ( & graphemes[ cur_start.. ] ) <= cur_max_width {
107108 for ( i, grapheme) in graphemes[ cur_start..] . iter ( ) . enumerate ( ) {
108109 if is_new_line ( grapheme) {
109110 // take care of blank lines
@@ -123,7 +124,7 @@ pub fn rewrite_string<'a>(
123124
124125 // The input starting at cur_start needs to be broken
125126 match break_string (
126- cur_max_chars ,
127+ cur_max_width ,
127128 fmt. trim_end ,
128129 fmt. line_end ,
129130 & graphemes[ cur_start..] ,
@@ -133,7 +134,7 @@ pub fn rewrite_string<'a>(
133134 result. push_str ( fmt. line_end ) ;
134135 result. push_str ( & indent_with_newline) ;
135136 result. push_str ( fmt. line_start ) ;
136- cur_max_chars = newline_max_chars;
137+ cur_max_width = newline_max_chars;
137138 cur_start += len;
138139 }
139140 SnippetState :: EndWithLineFeed ( line, len) => {
@@ -143,11 +144,11 @@ pub fn rewrite_string<'a>(
143144 result. push_str ( & line) ;
144145 if is_bareline_ok {
145146 // the next line can benefit from the full width
146- cur_max_chars = max_chars_without_indent ;
147+ cur_max_width = max_width_without_indent ;
147148 } else {
148149 result. push_str ( & indent_without_newline) ;
149150 result. push_str ( fmt. line_start ) ;
150- cur_max_chars = max_chars_with_indent ;
151+ cur_max_width = max_width_with_indent ;
151152 }
152153 cur_start += len;
153154 }
@@ -226,9 +227,10 @@ fn not_whitespace_except_line_feed(g: &str) -> bool {
226227 is_new_line ( g) || !is_whitespace ( g)
227228}
228229
229- /// Break the input string at a boundary character around the offset `max_chars `. A boundary
230+ /// Break the input string at a boundary character around the offset `max_width `. A boundary
230231/// character is either a punctuation or a whitespace.
231- fn break_string ( max_chars : usize , trim_end : bool , line_end : & str , input : & [ & str ] ) -> SnippetState {
232+ /// FIXME(issue#3281): We must follow UAX#14 algorithm instead of this.
233+ fn break_string ( max_width : usize , trim_end : bool , line_end : & str , input : & [ & str ] ) -> SnippetState {
232234 let break_at = |index /* grapheme at index is included */ | {
233235 // Take in any whitespaces to the left/right of `input[index]` while
234236 // preserving line feeds
@@ -272,19 +274,33 @@ fn break_string(max_chars: usize, trim_end: bool, line_end: &str, input: &[&str]
272274 }
273275 } ;
274276
277+ // find a first index where the unicode width of input[0..x] become > max_width
278+ let max_width_index_in_input = {
279+ let mut cur_width = 0 ;
280+ let mut cur_index = 0 ;
281+ for ( i, grapheme) in input. iter ( ) . enumerate ( ) {
282+ cur_width += unicode_str_width ( grapheme) ;
283+ cur_index = i;
284+ if cur_width > max_width {
285+ break ;
286+ }
287+ }
288+ cur_index
289+ } ;
290+
275291 // Find the position in input for breaking the string
276292 if line_end. is_empty ( )
277293 && trim_end
278- && !is_whitespace ( input[ max_chars - 1 ] )
279- && is_whitespace ( input[ max_chars ] )
294+ && !is_whitespace ( input[ max_width_index_in_input - 1 ] )
295+ && is_whitespace ( input[ max_width_index_in_input ] )
280296 {
281297 // At a breaking point already
282298 // The line won't invalidate the rewriting because:
283299 // - no extra space needed for the line_end character
284300 // - extra whitespaces to the right can be trimmed
285- return break_at ( max_chars - 1 ) ;
301+ return break_at ( max_width_index_in_input - 1 ) ;
286302 }
287- if let Some ( url_index_end) = detect_url ( input, max_chars ) {
303+ if let Some ( url_index_end) = detect_url ( input, max_width_index_in_input ) {
288304 let index_plus_ws = url_index_end
289305 + input[ url_index_end..]
290306 . iter ( )
@@ -297,27 +313,28 @@ fn break_string(max_chars: usize, trim_end: bool, line_end: &str, input: &[&str]
297313 return SnippetState :: LineEnd ( input[ ..=index_plus_ws] . concat ( ) , index_plus_ws + 1 ) ;
298314 } ;
299315 }
300- match input[ 0 ..max_chars]
316+
317+ match input[ 0 ..max_width_index_in_input]
301318 . iter ( )
302319 . rposition ( |grapheme| is_whitespace ( grapheme) )
303320 {
304321 // Found a whitespace and what is on its left side is big enough.
305322 Some ( index) if index >= MIN_STRING => break_at ( index) ,
306323 // No whitespace found, try looking for a punctuation instead
307- _ => match input[ 0 ..max_chars ]
324+ _ => match input[ 0 ..max_width_index_in_input ]
308325 . iter ( )
309326 . rposition ( |grapheme| is_punctuation ( grapheme) )
310327 {
311328 // Found a punctuation and what is on its left side is big enough.
312329 Some ( index) if index >= MIN_STRING => break_at ( index) ,
313330 // Either no boundary character was found to the left of `input[max_chars]`, or the line
314331 // got too small. We try searching for a boundary character to the right.
315- _ => match input[ max_chars ..]
332+ _ => match input[ max_width_index_in_input ..]
316333 . iter ( )
317334 . position ( |grapheme| is_whitespace ( grapheme) || is_punctuation ( grapheme) )
318335 {
319336 // A boundary was found after the line limit
320- Some ( index) => break_at ( max_chars + index) ,
337+ Some ( index) => break_at ( max_width_index_in_input + index) ,
321338 // No boundary to the right, the input cannot be broken
322339 None => SnippetState :: EndOfInput ( input. concat ( ) ) ,
323340 } ,
@@ -335,10 +352,11 @@ fn is_whitespace(grapheme: &str) -> bool {
335352}
336353
337354fn is_punctuation ( grapheme : & str ) -> bool {
338- match grapheme. as_bytes ( ) [ 0 ] {
339- b':' | b',' | b';' | b'.' => true ,
340- _ => false ,
341- }
355+ grapheme. chars ( ) . all ( |c| c. is_punctuation_other ( ) )
356+ }
357+
358+ fn graphemes_width ( graphemes : & [ & str ] ) -> usize {
359+ graphemes. iter ( ) . map ( |s| unicode_str_width ( s) ) . sum ( )
342360}
343361
344362#[ cfg( test) ]
0 commit comments