Skip to content

Commit

Permalink
Merge pull request #17 from mgeisler/hyphens-cmdline-options
Browse files Browse the repository at this point in the history
Avoid splitting on hyphens in cmdline options
  • Loading branch information
mgeisler authored Jan 21, 2017
2 parents b072b07 + f876809 commit 0416ca4
Showing 1 changed file with 32 additions and 4 deletions.
36 changes: 32 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,17 @@ impl<'a> Wrapper<'a> {
// Split on hyphens or use the language corpus.
match self.corpus {
None => {
// Split on hyphens, smallest split first.
for (n, _) in word.match_indices('-') {
let (head, tail) = word.split_at(n + 1);
result.push((head, "", tail));
// Split on hyphens, smallest split first. We only use
// hyphens that are surrounded by alphanumeric
// characters. This is to avoid splitting on repeated
// hyphens, such as those found in --foo-bar.
let char_indices = word.char_indices().collect::<Vec<_>>();
for w in char_indices.windows(3) {
let ((_, prev), (n, c), (_, next)) = (w[0], w[1], w[2]);
if prev.is_alphanumeric() && c == '-' && next.is_alphanumeric() {
let (head, tail) = word.split_at(n + 1);
result.push((head, "", tail));
}
}
}
Some(corpus) => {
Expand Down Expand Up @@ -381,6 +388,27 @@ mod tests {
assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]);
}

#[test]
fn hyphens_flag() {
assert_eq!(wrap("The --foo-bar flag.", 5),
vec!["The", "--foo-", "bar", "flag."]);
}

#[test]
fn repeated_hyphens() {
assert_eq!(wrap("foo--bar", 4), vec!["foo--bar"]);
}

#[test]
fn hyphens_alphanumeric() {
assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]);
}

#[test]
fn hyphens_non_alphanumeric() {
assert_eq!(wrap("foo(-)bar", 5), vec!["foo(-)bar"]);
}

#[test]
fn multiple_splits() {
assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]);
Expand Down

0 comments on commit 0416ca4

Please sign in to comment.