Skip to content

Commit

Permalink
Handle overflows in wrap_optimal_fit by divide-and-conquer
Browse files Browse the repository at this point in the history
The `wrap_optimal_fit algorithm` computes the penalty for a gap as
`gap * gap`. If a fragment has a size near `usize::max_value()` and if
the line width is small, this computation can easily overflow.

When this happened, we would previously abort or unwind. Now, we
instead do the computations with checked arithmetic and detect the
overflow. We then proceed to wrap the half of the fragments by
themselves. If this work, we then wrap the second half. This way, we
might be able to wrap everything without overflow.

Should there be a single fragment which causes the overflow by itself,
this fragment is put on a line by itself.

When wrapping part of the fragments, we might of course end up with a
partial last line. To fix this, we simply pop this line and re-wrap
the fragments that were put onto this line. This ensures no “seams” in
the wrapping.

Fixes #247.
  • Loading branch information
mgeisler committed Dec 26, 2020
1 parent e26ef14 commit 72c39a4
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 19 deletions.
8 changes: 7 additions & 1 deletion fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ edition = "2018"
cargo-fuzz = true

[dependencies]
libfuzzer-sys = "0.3"
libfuzzer-sys = { version = "0.3", features = ["arbitrary-derive"] }
textwrap = { path = ".." }

# Prevent this from interfering with workspaces
Expand All @@ -28,3 +28,9 @@ name = "fill_first_fit"
path = "fuzz_targets/fill_first_fit.rs"
test = false
doc = false

[[bin]]
name = "wrap_optimal_fit"
path = "fuzz_targets/wrap_optimal_fit.rs"
test = false
doc = false
114 changes: 97 additions & 17 deletions src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ impl LineNumbers {
}
}

fn get(&self, i: usize, minima: &[(usize, i32)]) -> usize {
fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
while self.line_numbers.borrow_mut().len() < i + 1 {
let pos = self.line_numbers.borrow().len();
let line_number = 1 + self.get(minima[pos].0, &minima);
Expand All @@ -551,7 +551,7 @@ impl LineNumbers {

/// Per-line penalty. This is added for every line, which makes it
/// expensive to output more lines than the minimum required.
const NLINE_PENALTY: i32 = 1000;
const NLINE_PENALTY: usize = 1000;

/// Per-character cost for lines that overflow the target line width.
///
Expand Down Expand Up @@ -590,16 +590,16 @@ const NLINE_PENALTY: i32 = 1000;
/// _and_ if it happens to overflow the line by exactly one character.
/// If it overflows by more than one character, the overflow penalty
/// will quickly outgrow the cost of the gap, as seen above.
const OVERFLOW_PENALTY: i32 = 50 * 50;
const OVERFLOW_PENALTY: usize = 50 * 50;

/// The last line is short if it is less than 1/4 of the target width.
const SHORT_LINE_FRACTION: usize = 4;

/// Penalize a short last line.
const SHORT_LAST_LINE_PENALTY: i32 = 25;
const SHORT_LAST_LINE_PENALTY: usize = 25;

/// Penalty for lines ending with a hyphen.
const HYPHEN_PENALTY: i32 = 25;
const HYPHEN_PENALTY: usize = 25;

/// Wrap abstract fragments into lines with an optimal-fit algorithm.
///
Expand Down Expand Up @@ -675,6 +675,50 @@ pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
fragments: &'a [T],
line_widths: F,
) -> Vec<&'a [T]> {
let mut min_idx = 0;
let mut max_idx = fragments.len();

let mut result = Vec::new();

loop {
match wrap_optimal_fit_checked(&fragments[min_idx..max_idx], &line_widths) {
Some(lines) => {
let potentially_partial_last_line = lines.len() > 1;
result.extend(lines);
if max_idx == fragments.len() {
return result; // All done!
}

min_idx = max_idx;
max_idx = fragments.len();

if potentially_partial_last_line {
let last_line = result.pop().unwrap();
min_idx -= last_line.len();
}
}
None => {
if max_idx - min_idx < 2 {
// This single fragment is causing an overflow, so
// we put on its own line.
result.push(&fragments[min_idx..max_idx]);
min_idx = max_idx;
max_idx = fragments.len();
} else {
max_idx = min_idx + (max_idx - min_idx) / 2;
}
}
}
}
}

/// Wrap abstract fragments into lines with an optimal-fit algorithm.
/// Returns `None` if an overflow occurs during the penalty
/// computations. See [`wrap_optimal_fit`].
fn wrap_optimal_fit_checked<'a, T: Fragment, F: Fn(usize) -> usize>(
fragments: &'a [T],
line_widths: F,
) -> Option<Vec<&'a [T]>> {
let mut widths = Vec::with_capacity(fragments.len() + 1);
let mut width = 0;
widths.push(width);
Expand All @@ -683,53 +727,79 @@ pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
widths.push(width);
}

if widths.last() < Some(&line_widths(0)) {
return Some(vec![fragments]);
}

// The cost computation below panics if it encounters a line where
// the gap is larger than `usize::max_value() / 2`.

let line_numbers = LineNumbers::new(fragments.len());
let detected_overflow = RefCell::new(false);

let minima = smawk::online_column_minima(0, widths.len(), |minima, i, j| {
let cost_fn = |minima: &[(usize, usize)], i, j| -> Option<usize> {
// Line number for fragment `i`.
let line_number = line_numbers.get(i, &minima);
let target_width = std::cmp::max(1, line_widths(line_number));

// Compute the width of a line spanning fragments[i..j] in
// constant time. We need to adjust widths[j] by subtracting
// the whitespace of fragment[j-i] and then add the penalty.
let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
+ fragments[j - 1].penalty_width();
let last_fragment: &T = &fragments[j - 1];
let line_width = widths[j] - widths[i] - last_fragment.whitespace_width()
+ last_fragment.penalty_width();

// We compute cost of the line containing fragments[i..j]. We
// start with values[i].1, which is the optimal cost for
// breaking before fragments[i].
//
// First, every extra line cost NLINE_PENALTY.
let mut cost = minima[i].1 + NLINE_PENALTY;
let mut cost = minima[i].1.checked_add(NLINE_PENALTY)?;

// Next, we add a penalty depending on the line length.
if line_width > target_width {
// Lines that overflow get a hefty penalty.
let overflow = (line_width - target_width) as i32;
cost += overflow * OVERFLOW_PENALTY;
let overflow: usize = line_width - target_width;
cost = cost.checked_add(overflow.checked_mul(OVERFLOW_PENALTY)?)?;
} else if j < fragments.len() {
// Other lines (except for the last line) get a milder
// penalty which depend on the size of the gap.
let gap = (target_width - line_width) as i32;
cost += gap * gap;
let gap: usize = target_width - line_width;
cost = cost.checked_add(gap.checked_mul(gap)?)?;
} else if i + 1 == j && line_width < target_width / SHORT_LINE_FRACTION {
// The last line can have any size gap, but we do add a
// penalty if the line is very short (typically because it
// contains just a single word).
cost += SHORT_LAST_LINE_PENALTY;
cost = cost.checked_add(SHORT_LAST_LINE_PENALTY)?;
}

// Finally, we discourage hyphens.
if fragments[j - 1].penalty_width() > 0 {
// TODO: this should use a penalty value from the fragment
// instead.
cost += HYPHEN_PENALTY;
cost = cost.checked_add(HYPHEN_PENALTY)?;
}

cost
Some(cost)
};

let minima = smawk::online_column_minima(0, widths.len(), |minima: &[(usize, usize)], i, j| {
if *detected_overflow.borrow() {
return 0;
}
match cost_fn(minima, i, j) {
Some(cost) => cost,
None => {
*detected_overflow.borrow_mut() = true;
0
}
}
});

if detected_overflow.into_inner() {
return None;
}

let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
let mut pos = fragments.len();
loop {
Expand All @@ -742,7 +812,7 @@ pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
}

lines.reverse();
lines
Some(lines)
}

#[cfg(test)]
Expand Down Expand Up @@ -927,4 +997,14 @@ mod tests {
]
);
}

#[derive(Debug, Eq, PartialEq)]
struct BoxGluePenalty(usize);

#[rustfmt::skip]
impl Fragment for BoxGluePenalty {
fn width(&self) -> usize { self.0 }
fn whitespace_width(&self) -> usize { 1 }
fn penalty_width(&self) -> usize { 0 }
}
}
7 changes: 6 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -850,10 +850,15 @@ mod tests {
}

#[test]
fn max_width() {
fn max_width_usize() {
assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]);
}

#[test]
fn max_width_usize_issue_247() {
assert_eq!(wrap("x y", 515566821223), vec!["x y"]);
}

#[test]
fn leading_whitespace() {
assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]);
Expand Down

0 comments on commit 72c39a4

Please sign in to comment.