diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index da84414b..5bca9f53 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -134,11 +134,17 @@ jobs: - name: Build fuzz targets run: cargo fuzz build - - name: Fuzz test - run: cargo fuzz run fill_first_fit -- -max_total_time=30 + - name: Fuzz test wrap_first_fit + run: cargo fuzz run wrap_first_fit -- -max_total_time=30 - - name: Minimize fuzz corpus - run: cargo fuzz cmin fill_first_fit + - name: Fuzz test wrap_optimal_fit + run: cargo fuzz run wrap_optimal_fit -- -max_total_time=30 + + - name: Minimize wrap_first_fit corpus + run: cargo fuzz cmin wrap_first_fit + + - name: Minimize wrap_optimal_fit corpus + run: cargo fuzz cmin wrap_optimal_fit binary-sizes: name: Compute binary sizes diff --git a/examples/wasm/src/lib.rs b/examples/wasm/src/lib.rs index 59de964e..978883ae 100644 --- a/examples/wasm/src/lib.rs +++ b/examples/wasm/src/lib.rs @@ -145,22 +145,20 @@ impl<'a> CanvasWord<'a> { } } -const PRECISION: usize = 10; - impl textwrap::core::Fragment for CanvasWord<'_> { #[inline] - fn width(&self) -> usize { - (self.width * PRECISION as f64) as usize + fn width(&self) -> f64 { + self.width } #[inline] - fn whitespace_width(&self) -> usize { - (self.whitespace_width * PRECISION as f64) as usize + fn whitespace_width(&self) -> f64 { + self.whitespace_width } #[inline] - fn penalty_width(&self) -> usize { - (self.penalty_width * PRECISION as f64) as usize + fn penalty_width(&self) -> f64 { + self.penalty_width } } @@ -292,7 +290,7 @@ impl Into for WasmOptimalFit { #[wasm_bindgen] #[derive(Copy, Clone, Debug)] pub struct WasmOptions { - pub width: usize, + pub width: f64, pub break_words: bool, pub word_separator: WasmWordSeparator, pub word_splitter: WasmWordSplitter, @@ -304,7 +302,7 @@ pub struct WasmOptions { impl WasmOptions { #[wasm_bindgen(constructor)] pub fn new( - width: usize, + width: f64, break_words: bool, word_separator: WasmWordSeparator, word_splitter: WasmWordSplitter, @@ -359,19 +357,19 @@ pub fn draw_wrapped_text( .flat_map(|word| { let canvas_word = CanvasWord::from(ctx, word); if options.break_words { - canvas_word.break_apart(ctx, options.width as f64) + canvas_word.break_apart(ctx, options.width) } else { vec![canvas_word] } }) .collect::>(); - let line_lengths = [options.width * PRECISION]; + let line_lengths = [options.width]; let wrapped_words = match options.wrap_algorithm { WasmWrapAlgorithm::FirstFit => wrap_first_fit(&canvas_words, &line_lengths), WasmWrapAlgorithm::OptimalFit => { let penalties = options.optimal_fit.into(); - wrap_optimal_fit(&canvas_words, &line_lengths, &penalties) + wrap_optimal_fit(&canvas_words, &line_lengths, &penalties).unwrap() } _ => Err("WasmOptions has an invalid wrap_algorithm field")?, }; diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 0d4a5655..6080fe68 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -41,3 +41,9 @@ name = "wrap_optimal_fit" path = "fuzz_targets/wrap_optimal_fit.rs" test = false doc = false + +[[bin]] +name = "wrap_optimal_fit_usize" +path = "fuzz_targets/wrap_optimal_fit_usize.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/wrap_first_fit.rs b/fuzz/fuzz_targets/wrap_first_fit.rs index 6721aba9..bc7136ee 100644 --- a/fuzz/fuzz_targets/wrap_first_fit.rs +++ b/fuzz/fuzz_targets/wrap_first_fit.rs @@ -4,21 +4,21 @@ use libfuzzer_sys::fuzz_target; use textwrap::core; use textwrap::wrap_algorithms::wrap_first_fit; -#[derive(Arbitrary, Debug, Eq, PartialEq)] +#[derive(Arbitrary, Debug, PartialEq)] struct Word { - width: usize, - whitespace_width: usize, - penalty_width: usize, + width: f64, + whitespace_width: f64, + penalty_width: f64, } #[rustfmt::skip] impl core::Fragment for Word { - fn width(&self) -> usize { self.width } - fn whitespace_width(&self) -> usize { self.whitespace_width } - fn penalty_width(&self) -> usize { self.penalty_width } + fn width(&self) -> f64 { self.width } + fn whitespace_width(&self) -> f64 { self.whitespace_width } + fn penalty_width(&self) -> f64 { self.penalty_width } } -fuzz_target!(|input: (usize, Vec)| { +fuzz_target!(|input: (f64, Vec)| { let width = input.0; let words = input.1; let _ = wrap_first_fit(&words, &[width]); diff --git a/fuzz/fuzz_targets/wrap_optimal_fit.rs b/fuzz/fuzz_targets/wrap_optimal_fit.rs index 2dd89c16..87bc8f1d 100644 --- a/fuzz/fuzz_targets/wrap_optimal_fit.rs +++ b/fuzz/fuzz_targets/wrap_optimal_fit.rs @@ -25,23 +25,37 @@ impl Into for Penalties { } } -#[derive(Arbitrary, Debug, Eq, PartialEq)] +#[derive(Arbitrary, Debug, PartialEq)] struct Word { - width: usize, - whitespace_width: usize, - penalty_width: usize, + width: f64, + whitespace_width: f64, + penalty_width: f64, } #[rustfmt::skip] impl core::Fragment for Word { - fn width(&self) -> usize { self.width } - fn whitespace_width(&self) -> usize { self.whitespace_width } - fn penalty_width(&self) -> usize { self.penalty_width } + fn width(&self) -> f64 { self.width } + fn whitespace_width(&self) -> f64 { self.whitespace_width } + fn penalty_width(&self) -> f64 { self.penalty_width } } +// Check wrapping fragments with mostly arbitrary widths. Infinite +// widths are not supported since they instantly trigger an overflow +// in the cost computation. Similarly for very large values: the 1e100 +// bound used here is somewhat conservative, the real bound seems to +// be around 1e170. fuzz_target!(|input: (usize, Vec, Penalties)| { let width = input.0; let words = input.1; let penalties = input.2.into(); - let _ = wrap_optimal_fit(&words, &[width], &penalties); + + for word in &words { + for width in [word.width, word.whitespace_width, word.penalty_width] { + if !width.is_finite() || width.abs() > 1e100 { + return; + } + } + } + + let _ = wrap_optimal_fit(&words, &[width as f64], &penalties); }); diff --git a/fuzz/fuzz_targets/wrap_optimal_fit_usize.rs b/fuzz/fuzz_targets/wrap_optimal_fit_usize.rs new file mode 100644 index 00000000..4af3ba9f --- /dev/null +++ b/fuzz/fuzz_targets/wrap_optimal_fit_usize.rs @@ -0,0 +1,49 @@ +#![no_main] +use arbitrary::Arbitrary; +use libfuzzer_sys::fuzz_target; +use textwrap::core; +use textwrap::wrap_algorithms::{wrap_optimal_fit, OptimalFit}; + +#[derive(Arbitrary, Debug)] +struct Penalties { + nline_penalty: usize, + overflow_penalty: usize, + short_last_line_fraction: usize, + short_last_line_penalty: usize, + hyphen_penalty: usize, +} + +impl Into for Penalties { + fn into(self) -> OptimalFit { + OptimalFit { + nline_penalty: self.nline_penalty, + overflow_penalty: self.overflow_penalty, + short_last_line_fraction: std::cmp::max(1, self.short_last_line_fraction), + short_last_line_penalty: self.short_last_line_penalty, + hyphen_penalty: self.hyphen_penalty, + } + } +} + +#[derive(Arbitrary, Debug, PartialEq)] +struct Word { + width: usize, + whitespace_width: usize, + penalty_width: usize, +} + +#[rustfmt::skip] +impl core::Fragment for Word { + fn width(&self) -> f64 { self.width as f64 } + fn whitespace_width(&self) -> f64 { self.whitespace_width as f64 } + fn penalty_width(&self) -> f64 { self.penalty_width as f64 } +} + +// Check wrapping fragments generated with integer widths. These +// fragments are of the same form as the ones generated by wrap. +fuzz_target!(|input: (usize, Vec, Penalties)| { + let width = input.0; + let words = input.1; + let penalties = input.2.into(); + let _ = wrap_optimal_fit(&words, &[width as f64], &penalties); +}); diff --git a/src/core.rs b/src/core.rs index 1ea18f81..6c3512ab 100644 --- a/src/core.rs +++ b/src/core.rs @@ -197,15 +197,15 @@ pub fn display_width(text: &str) -> usize { /// the displayed width of each part, which this trait provides. pub trait Fragment: std::fmt::Debug { /// Displayed width of word represented by this fragment. - fn width(&self) -> usize; + fn width(&self) -> f64; /// Displayed width of the whitespace that must follow the word /// when the word is not at the end of a line. - fn whitespace_width(&self) -> usize; + fn whitespace_width(&self) -> f64; /// Displayed width of the penalty that must be inserted if the /// word falls at the end of a line. - fn penalty_width(&self) -> usize; + fn penalty_width(&self) -> f64; } /// A piece of wrappable text, including any trailing whitespace. @@ -304,22 +304,22 @@ impl<'a> Word<'a> { impl Fragment for Word<'_> { #[inline] - fn width(&self) -> usize { - self.width + fn width(&self) -> f64 { + self.width as f64 } // We assume the whitespace consist of ' ' only. This allows us to // compute the display width in constant time. #[inline] - fn whitespace_width(&self) -> usize { - self.whitespace.len() + fn whitespace_width(&self) -> f64 { + self.whitespace.len() as f64 } // We assume the penalty is `""` or `"-"`. This allows us to // compute the display width in constant time. #[inline] - fn penalty_width(&self) -> usize { - self.penalty.len() + fn penalty_width(&self) -> f64 { + self.penalty.len() as f64 } } @@ -334,7 +334,7 @@ where { let mut shortened_words = Vec::new(); for word in words { - if word.width() > line_width { + if word.width() > line_width as f64 { shortened_words.extend(word.break_apart(line_width)); } else { shortened_words.push(word); diff --git a/src/lib.rs b/src/lib.rs index 1be49af8..374835df 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1304,7 +1304,7 @@ pub fn fill_inplace(text: &mut String, width: usize) { let words = word_separators::AsciiSpace .find_words(line) .collect::>(); - let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[width]); + let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[width as f64]); let mut line_offset = offset; for words in &wrapped_words[..wrapped_words.len() - 1] { @@ -1392,19 +1392,12 @@ mod tests { } #[test] - #[cfg(not(feature = "smawk"))] fn max_width() { - // No overflow for the first-fit wrap algorithm. - assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]); - } + assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]); - #[test] - #[cfg(feature = "smawk")] - #[should_panic(expected = "attempt to multiply with overflow")] - fn max_width() { - // The optimal-fit algorithm overflows for extreme line - // widths. See #247 and #416 for details.. - assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]); + let text = "Hello there! This is some English text. \ + It should not be wrapped given the extents below."; + assert_eq!(wrap(text, usize::MAX), vec![text]); } #[test] diff --git a/src/wrap_algorithms.rs b/src/wrap_algorithms.rs index c216e708..597fd102 100644 --- a/src/wrap_algorithms.rs +++ b/src/wrap_algorithms.rs @@ -18,7 +18,7 @@ #[cfg(feature = "smawk")] mod optimal_fit; #[cfg(feature = "smawk")] -pub use optimal_fit::{wrap_optimal_fit, OptimalFit}; +pub use optimal_fit::{wrap_optimal_fit, OptimalFit, OverflowError}; use crate::core::{Fragment, Word}; @@ -93,7 +93,12 @@ impl Default for FirstFit { impl WrapAlgorithm for FirstFit { #[inline] fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> { - wrap_first_fit(words, line_widths) + // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53 + // = 9_007_199_254_740_992 can be represented without loss by + // a f64. Larger line widths will be rounded to the nearest + // representable number. + let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::>(); + wrap_first_fit(words, &f64_line_widths) } } @@ -132,7 +137,7 @@ impl WrapAlgorithm for FirstFit { /// /// let text = "These few words will unfortunately not wrap nicely."; /// let words = AsciiSpace.find_words(text).collect::>(); -/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15])), +/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])), /// vec!["These few words", /// "will", // <-- short line /// "unfortunately", @@ -143,7 +148,7 @@ impl WrapAlgorithm for FirstFit { /// #[cfg(feature = "smawk")] /// use textwrap::wrap_algorithms::{wrap_optimal_fit, OptimalFit}; /// #[cfg(feature = "smawk")] -/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15], &OptimalFit::new())), +/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &OptimalFit::new()).unwrap()), /// vec!["These few", /// "words will", /// "unfortunately", @@ -178,41 +183,41 @@ impl WrapAlgorithm for FirstFit { /// #[derive(Debug)] /// struct Task<'a> { /// name: &'a str, -/// hours: usize, // Time needed to complete task. -/// sweep: usize, // Time needed for a quick sweep after task during the day. -/// cleanup: usize, // Time needed for full cleanup if day ends with this task. +/// hours: f64, // Time needed to complete task. +/// sweep: f64, // Time needed for a quick sweep after task during the day. +/// cleanup: f64, // Time needed for full cleanup if day ends with this task. /// } /// /// impl Fragment for Task<'_> { -/// fn width(&self) -> usize { self.hours } -/// fn whitespace_width(&self) -> usize { self.sweep } -/// fn penalty_width(&self) -> usize { self.cleanup } +/// fn width(&self) -> f64 { self.hours } +/// fn whitespace_width(&self) -> f64 { self.sweep } +/// fn penalty_width(&self) -> f64 { self.cleanup } /// } /// /// // The morning tasks /// let tasks = vec![ -/// Task { name: "Foundation", hours: 4, sweep: 2, cleanup: 3 }, -/// Task { name: "Framing", hours: 3, sweep: 1, cleanup: 2 }, -/// Task { name: "Plumbing", hours: 2, sweep: 2, cleanup: 2 }, -/// Task { name: "Electrical", hours: 2, sweep: 1, cleanup: 2 }, -/// Task { name: "Insulation", hours: 2, sweep: 1, cleanup: 2 }, -/// Task { name: "Drywall", hours: 3, sweep: 1, cleanup: 2 }, -/// Task { name: "Floors", hours: 3, sweep: 1, cleanup: 2 }, -/// Task { name: "Countertops", hours: 1, sweep: 1, cleanup: 2 }, -/// Task { name: "Bathrooms", hours: 2, sweep: 1, cleanup: 2 }, +/// Task { name: "Foundation", hours: 4.0, sweep: 2.0, cleanup: 3.0 }, +/// Task { name: "Framing", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Plumbing", hours: 2.0, sweep: 2.0, cleanup: 2.0 }, +/// Task { name: "Electrical", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Insulation", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Drywall", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Floors", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Bathrooms", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, /// ]; /// /// // Fill tasks into days, taking `day_length` into account. The /// // output shows the hours worked per day along with the names of /// // the tasks for that day. -/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: usize) -> Vec<(usize, Vec<&'a str>)> { +/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> { /// let mut days = Vec::new(); /// // Assign tasks to days. The assignment is a vector of slices, /// // with a slice per day. /// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]); /// for day in assigned_days.iter() { /// let last = day.last().unwrap(); -/// let work_hours: usize = day.iter().map(|t| t.hours + t.sweep).sum(); +/// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum(); /// let names = day.iter().map(|t| t.name).collect::>(); /// days.push((work_hours - last.sweep + last.cleanup, names)); /// } @@ -221,24 +226,24 @@ impl WrapAlgorithm for FirstFit { /// /// // With a single crew working 8 hours a day: /// assert_eq!( -/// assign_days(&tasks, 8), +/// assign_days(&tasks, 8.0), /// [ -/// (7, vec!["Foundation"]), -/// (8, vec!["Framing", "Plumbing"]), -/// (7, vec!["Electrical", "Insulation"]), -/// (5, vec!["Drywall"]), -/// (7, vec!["Floors", "Countertops"]), -/// (4, vec!["Bathrooms"]), +/// (7.0, vec!["Foundation"]), +/// (8.0, vec!["Framing", "Plumbing"]), +/// (7.0, vec!["Electrical", "Insulation"]), +/// (5.0, vec!["Drywall"]), +/// (7.0, vec!["Floors", "Countertops"]), +/// (4.0, vec!["Bathrooms"]), /// ] /// ); /// /// // With two crews working in shifts, 16 hours a day: /// assert_eq!( -/// assign_days(&tasks, 16), +/// assign_days(&tasks, 16.0), /// [ -/// (14, vec!["Foundation", "Framing", "Plumbing"]), -/// (15, vec!["Electrical", "Insulation", "Drywall", "Floors"]), -/// (6, vec!["Countertops", "Bathrooms"]), +/// (14.0, vec!["Foundation", "Framing", "Plumbing"]), +/// (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]), +/// (6.0, vec!["Countertops", "Bathrooms"]), /// ] /// ); /// ``` @@ -247,13 +252,13 @@ impl WrapAlgorithm for FirstFit { /// knows how long each step takes :-) pub fn wrap_first_fit<'a, 'b, T: Fragment>( fragments: &'a [T], - line_widths: &'b [usize], + line_widths: &'b [f64], ) -> Vec<&'a [T]> { // The final line width is used for all remaining lines. - let default_line_width = line_widths.last().copied().unwrap_or(0); + let default_line_width = line_widths.last().copied().unwrap_or(0.0); let mut lines = Vec::new(); let mut start = 0; - let mut width = 0; + let mut width = 0.0; for (idx, fragment) in fragments.iter().enumerate() { let line_width = line_widths @@ -263,10 +268,52 @@ pub fn wrap_first_fit<'a, 'b, T: Fragment>( if width + fragment.width() + fragment.penalty_width() > line_width && idx > start { lines.push(&fragments[start..idx]); start = idx; - width = 0; + width = 0.0; } width += fragment.width() + fragment.whitespace_width(); } lines.push(&fragments[start..]); lines } + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, PartialEq)] + struct Word(f64); + + #[rustfmt::skip] + impl Fragment for Word { + fn width(&self) -> f64 { self.0 } + fn whitespace_width(&self) -> f64 { 1.0 } + fn penalty_width(&self) -> f64 { 0.0 } + } + + #[test] + fn wrap_string_longer_than_f64() { + let words = vec![ + Word(1e307), + Word(2e307), + Word(3e307), + Word(4e307), + Word(5e307), + Word(6e307), + ]; + // Wrap at just under f64::MAX (~19e307). The tiny + // whitespace_widths disappear because of loss of precision. + assert_eq!( + wrap_first_fit(&words, &[15e307]), + &[ + vec![ + Word(1e307), + Word(2e307), + Word(3e307), + Word(4e307), + Word(5e307) + ], + vec![Word(6e307)] + ] + ); + } +} diff --git a/src/wrap_algorithms/optimal_fit.rs b/src/wrap_algorithms/optimal_fit.rs index e046d587..8fe3a219 100644 --- a/src/wrap_algorithms/optimal_fit.rs +++ b/src/wrap_algorithms/optimal_fit.rs @@ -40,24 +40,25 @@ pub struct OptimalFit { /// /// let short = "foo "; /// let long = "x".repeat(50); + /// let length = (short.len() + long.len()) as f64; /// let fragments = vec![Word::from(short), Word::from(&long)]; /// let penalties = OptimalFit::new(); /// /// // Perfect fit, both words are on a single line with no overflow. - /// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len()], &penalties); + /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); /// /// // The words no longer fit, yet we get a single line back. While /// // the cost of overflow (`1 * 2500`) is the same as the cost of the /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty` /// // which makes it cheaper to overflow than to use two lines. - /// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 1], &penalties); + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); /// /// // The cost of overflow would be 2 * 2500, whereas the cost of /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 = /// // 3401`. We therefore get two lines. - /// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 2], &penalties); + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short)], /// &[Word::from(&long)]]); /// ``` @@ -161,7 +162,14 @@ impl Default for OptimalFit { impl WrapAlgorithm for OptimalFit { #[inline] fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> { - wrap_optimal_fit(words, line_widths, self) + // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53 + // = 9_007_199_254_740_992 can be represented without loss by + // a f64. Larger line widths will be rounded to the nearest + // representable number. + let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::>(); + // The computation cannnot overflow when the line widths are + // restricted to usize. + wrap_optimal_fit(words, &f64_line_widths, self).unwrap() } } @@ -191,6 +199,18 @@ impl LineNumbers { } } +/// Overflow error during the [`wrap_optimal_fit`] computation. +#[derive(Debug, PartialEq, Eq)] +pub struct OverflowError; + +impl std::fmt::Display for OverflowError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "wrap_optimal_fit cost computation overflowed") + } +} + +impl std::error::Error for OverflowError {} + /// Wrap abstract fragments into lines with an optimal-fit algorithm. /// /// The `line_widths` slice gives the target line width for each line @@ -264,17 +284,48 @@ impl LineNumbers { /// code by David /// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py). /// +/// # Errors +/// +/// In case of an overflow during the cost computation, an `Err` is +/// returned. Overflows happens when fragments or lines have infinite +/// widths (`f64::INFINITY`) or if the widths are so large that the +/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()` +/// (approximately 1e154): +/// +/// ``` +/// use textwrap::core::Fragment; +/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OptimalFit, OverflowError}; +/// +/// #[derive(Debug, PartialEq)] +/// struct Word(f64); +/// +/// impl Fragment for Word { +/// fn width(&self) -> f64 { self.0 } +/// fn whitespace_width(&self) -> f64 { 1.0 } +/// fn penalty_width(&self) -> f64 { 0.0 } +/// } +/// +/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is +/// // larger than f64::MAX: +/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &OptimalFit::default()), +/// Err(OverflowError)); +/// ``` +/// +/// When using fragment widths and line widths which fit inside an +/// `u64`, overflows cannot happen. This means that fragments derived +/// from a `&str` cannot cause overflows. +/// /// **Note:** Only available when the `smawk` Cargo feature is /// enabled. pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( fragments: &'a [T], - line_widths: &'b [usize], + line_widths: &'b [f64], penalties: &'b OptimalFit, -) -> Vec<&'a [T]> { +) -> Result, OverflowError> { // The final line width is used for all remaining lines. - let default_line_width = line_widths.last().copied().unwrap_or(0); + let default_line_width = line_widths.last().copied().unwrap_or(0.0); let mut widths = Vec::with_capacity(fragments.len() + 1); - let mut width = 0; + let mut width = 0.0; widths.push(width); for fragment in fragments { width += fragment.width() + fragment.whitespace_width(); @@ -283,18 +334,18 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( let line_numbers = LineNumbers::new(fragments.len()); - let minima = smawk::online_column_minima(0, widths.len(), |minima, i, j| { + let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| { // Line number for fragment `i`. let line_number = line_numbers.get(i, minima); let line_width = line_widths .get(line_number) .copied() .unwrap_or(default_line_width); - let target_width = std::cmp::max(1, line_width); + let target_width = line_width.max(1.0); // Compute the width of a line spanning fragments[i..j] in // constant time. We need to adjust widths[j] by subtracting - // the whitespace of fragment[j-i] and then add the penalty. + // the whitespace of fragment[j-1] and then add the penalty. let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width() + fragments[j - 1].penalty_width(); @@ -303,35 +354,43 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( // breaking before fragments[i]. // // First, every extra line cost NLINE_PENALTY. - let mut cost = minima[i].1 + penalties.nline_penalty; + let mut cost = minima[i].1 + penalties.nline_penalty as f64; // Next, we add a penalty depending on the line length. if line_width > target_width { // Lines that overflow get a hefty penalty. let overflow = line_width - target_width; - cost += overflow * penalties.overflow_penalty; + cost += overflow * penalties.overflow_penalty as f64; } else if j < fragments.len() { // Other lines (except for the last line) get a milder // penalty which depend on the size of the gap. let gap = target_width - line_width; cost += gap * gap; - } else if i + 1 == j && line_width < target_width / penalties.short_last_line_fraction { + } else if i + 1 == j + && line_width < target_width / penalties.short_last_line_fraction as f64 + { // The last line can have any size gap, but we do add a // penalty if the line is very short (typically because it // contains just a single word). - cost += penalties.short_last_line_penalty; + cost += penalties.short_last_line_penalty as f64; } // Finally, we discourage hyphens. - if fragments[j - 1].penalty_width() > 0 { + if fragments[j - 1].penalty_width() > 0.0 { // TODO: this should use a penalty value from the fragment // instead. - cost += penalties.hyphen_penalty; + cost += penalties.hyphen_penalty as f64; } cost }); + for (_, cost) in &minima { + if cost.is_infinite() { + return Err(OverflowError); + } + } + let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima)); let mut pos = fragments.len(); loop { @@ -344,5 +403,49 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( } lines.reverse(); - lines + Ok(lines) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, PartialEq)] + struct Word(f64); + + #[rustfmt::skip] + impl Fragment for Word { + fn width(&self) -> f64 { self.0 } + fn whitespace_width(&self) -> f64 { 1.0 } + fn penalty_width(&self) -> f64 { 0.0 } + } + + #[test] + fn wrap_fragments_with_infinite_widths() { + let words = vec![Word(f64::INFINITY)]; + assert_eq!( + wrap_optimal_fit(&words, &[0.0], &OptimalFit::default()), + Err(OverflowError) + ); + } + + #[test] + fn wrap_fragments_with_huge_widths() { + let words = vec![Word(1e200), Word(1e250), Word(1e300)]; + assert_eq!( + wrap_optimal_fit(&words, &[1e300], &OptimalFit::default()), + Err(OverflowError) + ); + } + + #[test] + fn wrap_fragments_with_large_widths() { + // The gaps will be of the sizes between 1e25 and 1e75. This + // makes the `gap * gap` cost fit comfortably in a f64. + let words = vec![Word(1e25), Word(1e50), Word(1e75)]; + assert_eq!( + wrap_optimal_fit(&words, &[1e100], &OptimalFit::default()), + Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]]) + ); + } }