Handle overflows in wrap_optimal_fit by divide-and-conquer

The `wrap_optimal_fit algorithm` computes the penalty for a gap as `gap * gap`. If a fragment has a size near `usize::max_value()` and if the line width is small, this computation can easily overflow. When this happened, we would previously abort or unwind. Now, we instead do the computations with checked arithmetic and detect the overflow. We then proceed to wrap the half of the fragments by themselves. If this work, we then wrap the second half. This way, we might be able to wrap everything without overflow. Should there be a single fragment which causes the overflow by itself, this fragment is put on a line by itself. When wrapping part of the fragments, we might of course end up with a partial last line. To fix this, we simply pop this line and re-wrap the fragments that were put onto this line. This ensures no “seams” in the wrapping. Fixes #247.
mgeisler · Dec 26, 2020 · 72c39a4 · 72c39a4
1 parent e26ef14
commit 72c39a4
Show file tree

Hide file tree

Showing 3 changed files with 110 additions and 19 deletions.
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
@@ -10,7 +10,7 @@ edition = "2018"
 cargo-fuzz = true
 
 [dependencies]
-libfuzzer-sys = "0.3"
+libfuzzer-sys = { version = "0.3", features = ["arbitrary-derive"] }
 textwrap = { path = ".." }
 
 # Prevent this from interfering with workspaces
@@ -28,3 +28,9 @@ name = "fill_first_fit"
 path = "fuzz_targets/fill_first_fit.rs"
 test = false
 doc = false
+
+[[bin]]
+name = "wrap_optimal_fit"
+path = "fuzz_targets/wrap_optimal_fit.rs"
+test = false
+doc = false
diff --git a/src/core.rs b/src/core.rs
@@ -538,7 +538,7 @@ impl LineNumbers {
  }
  }
 
- fn get(&self, i: usize, minima: &[(usize, i32)]) -> usize {
+ fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
  while self.line_numbers.borrow_mut().len() < i + 1 {
  let pos = self.line_numbers.borrow().len();
  let line_number = 1 + self.get(minima[pos].0, &minima);
@@ -551,7 +551,7 @@ impl LineNumbers {
 
 /// Per-line penalty. This is added for every line, which makes it
 /// expensive to output more lines than the minimum required.
-const NLINE_PENALTY: i32 = 1000;
+const NLINE_PENALTY: usize = 1000;
 
 /// Per-character cost for lines that overflow the target line width.
 ///
@@ -590,16 +590,16 @@ const NLINE_PENALTY: i32 = 1000;
 /// _and_ if it happens to overflow the line by exactly one character.
 /// If it overflows by more than one character, the overflow penalty
 /// will quickly outgrow the cost of the gap, as seen above.
-const OVERFLOW_PENALTY: i32 = 50 * 50;
+const OVERFLOW_PENALTY: usize = 50 * 50;
 
 /// The last line is short if it is less than 1/4 of the target width.
 const SHORT_LINE_FRACTION: usize = 4;
 
 /// Penalize a short last line.
-const SHORT_LAST_LINE_PENALTY: i32 = 25;
+const SHORT_LAST_LINE_PENALTY: usize = 25;
 
 /// Penalty for lines ending with a hyphen.
-const HYPHEN_PENALTY: i32 = 25;
+const HYPHEN_PENALTY: usize = 25;
 
 /// Wrap abstract fragments into lines with an optimal-fit algorithm.
 ///
@@ -675,6 +675,50 @@ pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
  fragments: &'a [T],
  line_widths: F,
 ) -> Vec<&'a [T]> {
+ let mut min_idx = 0;
+ let mut max_idx = fragments.len();
+
+ let mut result = Vec::new();
+
+ loop {
+ match wrap_optimal_fit_checked(&fragments[min_idx..max_idx], &line_widths) {
+ Some(lines) => {
+ let potentially_partial_last_line = lines.len() > 1;
+ result.extend(lines);
+ if max_idx == fragments.len() {
+ return result; // All done!
+ }
+
+ min_idx = max_idx;
+ max_idx = fragments.len();
+
+ if potentially_partial_last_line {
+ let last_line = result.pop().unwrap();
+ min_idx -= last_line.len();
+ }
+ }
+ None => {
+ if max_idx - min_idx < 2 {
+ // This single fragment is causing an overflow, so
+ // we put on its own line.
+ result.push(&fragments[min_idx..max_idx]);
+ min_idx = max_idx;
+ max_idx = fragments.len();
+ } else {
+ max_idx = min_idx + (max_idx - min_idx) / 2;
+ }
+ }
+ }
+ }
+}
+
+/// Wrap abstract fragments into lines with an optimal-fit algorithm.
+/// Returns `None` if an overflow occurs during the penalty
+/// computations. See [`wrap_optimal_fit`].
+fn wrap_optimal_fit_checked<'a, T: Fragment, F: Fn(usize) -> usize>(
+ fragments: &'a [T],
+ line_widths: F,
+) -> Option<Vec<&'a [T]>> {
  let mut widths = Vec::with_capacity(fragments.len() + 1);
  let mut width = 0;
  widths.push(width);
@@ -683,53 +727,79 @@ pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
  widths.push(width);
  }
 
+ if widths.last() < Some(&line_widths(0)) {
+ return Some(vec![fragments]);
+ }
+
+ // The cost computation below panics if it encounters a line where
+ // the gap is larger than `usize::max_value() / 2`.
+
  let line_numbers = LineNumbers::new(fragments.len());
+ let detected_overflow = RefCell::new(false);
 
- let minima = smawk::online_column_minima(0, widths.len(), |minima, i, j| {
+ let cost_fn = |minima: &[(usize, usize)], i, j| -> Option<usize> {
  // Line number for fragment `i`.
  let line_number = line_numbers.get(i, &minima);
  let target_width = std::cmp::max(1, line_widths(line_number));
 
  // Compute the width of a line spanning fragments[i..j] in
  // constant time. We need to adjust widths[j] by subtracting
  // the whitespace of fragment[j-i] and then add the penalty.
- let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
- + fragments[j - 1].penalty_width();
+ let last_fragment: &T = &fragments[j - 1];
+ let line_width = widths[j] - widths[i] - last_fragment.whitespace_width()
+ + last_fragment.penalty_width();
 
  // We compute cost of the line containing fragments[i..j]. We
  // start with values[i].1, which is the optimal cost for
  // breaking before fragments[i].
  //
  // First, every extra line cost NLINE_PENALTY.
- let mut cost = minima[i].1 + NLINE_PENALTY;
+ let mut cost = minima[i].1.checked_add(NLINE_PENALTY)?;
 
  // Next, we add a penalty depending on the line length.
  if line_width > target_width {
  // Lines that overflow get a hefty penalty.
- let overflow = (line_width - target_width) as i32;
- cost += overflow * OVERFLOW_PENALTY;
+ let overflow: usize = line_width - target_width;
+ cost = cost.checked_add(overflow.checked_mul(OVERFLOW_PENALTY)?)?;
  } else if j < fragments.len() {
  // Other lines (except for the last line) get a milder
  // penalty which depend on the size of the gap.
- let gap = (target_width - line_width) as i32;
- cost += gap * gap;
+ let gap: usize = target_width - line_width;
+ cost = cost.checked_add(gap.checked_mul(gap)?)?;
  } else if i + 1 == j && line_width < target_width / SHORT_LINE_FRACTION {
  // The last line can have any size gap, but we do add a
  // penalty if the line is very short (typically because it
  // contains just a single word).
- cost += SHORT_LAST_LINE_PENALTY;
+ cost = cost.checked_add(SHORT_LAST_LINE_PENALTY)?;
  }
 
  // Finally, we discourage hyphens.
  if fragments[j - 1].penalty_width() > 0 {
  // TODO: this should use a penalty value from the fragment
  // instead.
- cost += HYPHEN_PENALTY;
+ cost = cost.checked_add(HYPHEN_PENALTY)?;
  }
 
- cost
+ Some(cost)
+ };
+
+ let minima = smawk::online_column_minima(0, widths.len(), |minima: &[(usize, usize)], i, j| {
+ if *detected_overflow.borrow() {
+ return 0;
+ }
+ match cost_fn(minima, i, j) {
+ Some(cost) => cost,
+ None => {
+ *detected_overflow.borrow_mut() = true;
+ 0
+ }
+ }
  });
 
+ if detected_overflow.into_inner() {
+ return None;
+ }
+
  let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
  let mut pos = fragments.len();
  loop {
@@ -742,7 +812,7 @@ pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
  }
 
  lines.reverse();
- lines
+ Some(lines)
 }
 
 #[cfg(test)]
@@ -927,4 +997,14 @@ mod tests {
  ]
  );
  }
+
+ #[derive(Debug, Eq, PartialEq)]
+ struct BoxGluePenalty(usize);
+
+ #[rustfmt::skip]
+ impl Fragment for BoxGluePenalty {
+ fn width(&self) -> usize { self.0 }
+ fn whitespace_width(&self) -> usize { 1 }
+ fn penalty_width(&self) -> usize { 0 }
+ }
 }
diff --git a/src/lib.rs b/src/lib.rs
@@ -850,10 +850,15 @@ mod tests {
  }
 
  #[test]
- fn max_width() {
+ fn max_width_usize() {
  assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]);
  }
 
+ #[test]
+ fn max_width_usize_issue_247() {
+ assert_eq!(wrap("x y", 515566821223), vec!["x y"]);
+ }
+
  #[test]
  fn leading_whitespace() {
  assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]);