11
11
// Format string literals.
12
12
13
13
use regex:: Regex ;
14
+ use unicode_categories:: UnicodeCategories ;
14
15
use unicode_segmentation:: UnicodeSegmentation ;
15
16
16
17
use config:: Config ;
17
18
use shape:: Shape ;
18
- use utils:: wrap_str;
19
+ use utils:: { unicode_str_width , wrap_str} ;
19
20
20
21
const MIN_STRING : usize = 10 ;
21
22
@@ -53,7 +54,7 @@ impl<'a> StringFormat<'a> {
53
54
/// indentation into account.
54
55
///
55
56
/// If we cannot put at least a single character per line, the rewrite won't succeed.
56
- fn max_chars_with_indent ( & self ) -> Option < usize > {
57
+ fn max_width_with_indent ( & self ) -> Option < usize > {
57
58
Some (
58
59
self . shape
59
60
. width
@@ -62,10 +63,10 @@ impl<'a> StringFormat<'a> {
62
63
)
63
64
}
64
65
65
- /// Like max_chars_with_indent but the indentation is not subtracted.
66
+ /// Like max_width_with_indent but the indentation is not subtracted.
66
67
/// This allows to fit more graphemes from the string on a line when
67
68
/// SnippetState::EndWithLineFeed.
68
- fn max_chars_without_indent ( & self ) -> Option < usize > {
69
+ fn max_width_without_indent ( & self ) -> Option < usize > {
69
70
Some ( self . config . max_width ( ) . checked_sub ( self . line_end . len ( ) ) ?)
70
71
}
71
72
}
@@ -75,8 +76,8 @@ pub fn rewrite_string<'a>(
75
76
fmt : & StringFormat < ' a > ,
76
77
newline_max_chars : usize ,
77
78
) -> Option < String > {
78
- let max_chars_with_indent = fmt. max_chars_with_indent ( ) ?;
79
- let max_chars_without_indent = fmt. max_chars_without_indent ( ) ?;
79
+ let max_width_with_indent = fmt. max_width_with_indent ( ) ?;
80
+ let max_width_without_indent = fmt. max_width_without_indent ( ) ?;
80
81
let indent_with_newline = fmt. shape . indent . to_string_with_newline ( fmt. config ) ;
81
82
let indent_without_newline = fmt. shape . indent . to_string ( fmt. config ) ;
82
83
@@ -99,11 +100,11 @@ pub fn rewrite_string<'a>(
99
100
100
101
// Snip a line at a time from `stripped_str` until it is used up. Push the snippet
101
102
// onto result.
102
- let mut cur_max_chars = max_chars_with_indent ;
103
+ let mut cur_max_width = max_width_with_indent ;
103
104
let is_bareline_ok = fmt. line_start . is_empty ( ) || is_whitespace ( fmt. line_start ) ;
104
105
loop {
105
106
// All the input starting at cur_start fits on the current line
106
- if graphemes. len ( ) - cur_start <= cur_max_chars {
107
+ if graphemes_width ( & graphemes[ cur_start.. ] ) <= cur_max_width {
107
108
for ( i, grapheme) in graphemes[ cur_start..] . iter ( ) . enumerate ( ) {
108
109
if is_new_line ( grapheme) {
109
110
// take care of blank lines
@@ -123,7 +124,7 @@ pub fn rewrite_string<'a>(
123
124
124
125
// The input starting at cur_start needs to be broken
125
126
match break_string (
126
- cur_max_chars ,
127
+ cur_max_width ,
127
128
fmt. trim_end ,
128
129
fmt. line_end ,
129
130
& graphemes[ cur_start..] ,
@@ -133,7 +134,7 @@ pub fn rewrite_string<'a>(
133
134
result. push_str ( fmt. line_end ) ;
134
135
result. push_str ( & indent_with_newline) ;
135
136
result. push_str ( fmt. line_start ) ;
136
- cur_max_chars = newline_max_chars;
137
+ cur_max_width = newline_max_chars;
137
138
cur_start += len;
138
139
}
139
140
SnippetState :: EndWithLineFeed ( line, len) => {
@@ -143,11 +144,11 @@ pub fn rewrite_string<'a>(
143
144
result. push_str ( & line) ;
144
145
if is_bareline_ok {
145
146
// the next line can benefit from the full width
146
- cur_max_chars = max_chars_without_indent ;
147
+ cur_max_width = max_width_without_indent ;
147
148
} else {
148
149
result. push_str ( & indent_without_newline) ;
149
150
result. push_str ( fmt. line_start ) ;
150
- cur_max_chars = max_chars_with_indent ;
151
+ cur_max_width = max_width_with_indent ;
151
152
}
152
153
cur_start += len;
153
154
}
@@ -226,9 +227,10 @@ fn not_whitespace_except_line_feed(g: &str) -> bool {
226
227
is_new_line ( g) || !is_whitespace ( g)
227
228
}
228
229
229
- /// Break the input string at a boundary character around the offset `max_chars `. A boundary
230
+ /// Break the input string at a boundary character around the offset `max_width `. A boundary
230
231
/// character is either a punctuation or a whitespace.
231
- fn break_string ( max_chars : usize , trim_end : bool , line_end : & str , input : & [ & str ] ) -> SnippetState {
232
+ /// FIXME(issue#3281): We must follow UAX#14 algorithm instead of this.
233
+ fn break_string ( max_width : usize , trim_end : bool , line_end : & str , input : & [ & str ] ) -> SnippetState {
232
234
let break_at = |index /* grapheme at index is included */ | {
233
235
// Take in any whitespaces to the left/right of `input[index]` while
234
236
// preserving line feeds
@@ -272,19 +274,33 @@ fn break_string(max_chars: usize, trim_end: bool, line_end: &str, input: &[&str]
272
274
}
273
275
} ;
274
276
277
+ // find a first index where the unicode width of input[0..x] become > max_width
278
+ let max_width_index_in_input = {
279
+ let mut cur_width = 0 ;
280
+ let mut cur_index = 0 ;
281
+ for ( i, grapheme) in input. iter ( ) . enumerate ( ) {
282
+ cur_width += unicode_str_width ( grapheme) ;
283
+ cur_index = i;
284
+ if cur_width > max_width {
285
+ break ;
286
+ }
287
+ }
288
+ cur_index
289
+ } ;
290
+
275
291
// Find the position in input for breaking the string
276
292
if line_end. is_empty ( )
277
293
&& trim_end
278
- && !is_whitespace ( input[ max_chars - 1 ] )
279
- && is_whitespace ( input[ max_chars ] )
294
+ && !is_whitespace ( input[ max_width_index_in_input - 1 ] )
295
+ && is_whitespace ( input[ max_width_index_in_input ] )
280
296
{
281
297
// At a breaking point already
282
298
// The line won't invalidate the rewriting because:
283
299
// - no extra space needed for the line_end character
284
300
// - extra whitespaces to the right can be trimmed
285
- return break_at ( max_chars - 1 ) ;
301
+ return break_at ( max_width_index_in_input - 1 ) ;
286
302
}
287
- if let Some ( url_index_end) = detect_url ( input, max_chars ) {
303
+ if let Some ( url_index_end) = detect_url ( input, max_width_index_in_input ) {
288
304
let index_plus_ws = url_index_end
289
305
+ input[ url_index_end..]
290
306
. iter ( )
@@ -297,27 +313,28 @@ fn break_string(max_chars: usize, trim_end: bool, line_end: &str, input: &[&str]
297
313
return SnippetState :: LineEnd ( input[ ..=index_plus_ws] . concat ( ) , index_plus_ws + 1 ) ;
298
314
} ;
299
315
}
300
- match input[ 0 ..max_chars]
316
+
317
+ match input[ 0 ..max_width_index_in_input]
301
318
. iter ( )
302
319
. rposition ( |grapheme| is_whitespace ( grapheme) )
303
320
{
304
321
// Found a whitespace and what is on its left side is big enough.
305
322
Some ( index) if index >= MIN_STRING => break_at ( index) ,
306
323
// No whitespace found, try looking for a punctuation instead
307
- _ => match input[ 0 ..max_chars ]
324
+ _ => match input[ 0 ..max_width_index_in_input ]
308
325
. iter ( )
309
326
. rposition ( |grapheme| is_punctuation ( grapheme) )
310
327
{
311
328
// Found a punctuation and what is on its left side is big enough.
312
329
Some ( index) if index >= MIN_STRING => break_at ( index) ,
313
330
// Either no boundary character was found to the left of `input[max_chars]`, or the line
314
331
// got too small. We try searching for a boundary character to the right.
315
- _ => match input[ max_chars ..]
332
+ _ => match input[ max_width_index_in_input ..]
316
333
. iter ( )
317
334
. position ( |grapheme| is_whitespace ( grapheme) || is_punctuation ( grapheme) )
318
335
{
319
336
// A boundary was found after the line limit
320
- Some ( index) => break_at ( max_chars + index) ,
337
+ Some ( index) => break_at ( max_width_index_in_input + index) ,
321
338
// No boundary to the right, the input cannot be broken
322
339
None => SnippetState :: EndOfInput ( input. concat ( ) ) ,
323
340
} ,
@@ -335,10 +352,11 @@ fn is_whitespace(grapheme: &str) -> bool {
335
352
}
336
353
337
354
fn is_punctuation ( grapheme : & str ) -> bool {
338
- match grapheme. as_bytes ( ) [ 0 ] {
339
- b':' | b',' | b';' | b'.' => true ,
340
- _ => false ,
341
- }
355
+ grapheme. chars ( ) . all ( |c| c. is_punctuation_other ( ) )
356
+ }
357
+
358
+ fn graphemes_width ( graphemes : & [ & str ] ) -> usize {
359
+ graphemes. iter ( ) . map ( |s| unicode_str_width ( s) ) . sum ( )
342
360
}
343
361
344
362
#[ cfg( test) ]
0 commit comments