diff --git a/benches/linear.rs b/benches/linear.rs index 046a956e..9d7eef0f 100644 --- a/benches/linear.rs +++ b/benches/linear.rs @@ -32,7 +32,7 @@ pub fn benchmark(c: &mut Criterion) { { let options = textwrap::Options::new(LINE_LENGTH) .wrap_algorithm(textwrap::wrap_algorithms::OptimalFit) - .word_separator(textwrap::UnicodeBreakProperties); + .word_separator(textwrap::word_separators::UnicodeBreakProperties); group.bench_with_input( BenchmarkId::new("fill_optimal_fit_unicode", length), &text, @@ -44,7 +44,7 @@ pub fn benchmark(c: &mut Criterion) { let options = textwrap::Options::new(LINE_LENGTH) .wrap_algorithm(textwrap::wrap_algorithms::OptimalFit) - .word_separator(textwrap::AsciiSpace); + .word_separator(textwrap::word_separators::AsciiSpace); group.bench_with_input( BenchmarkId::new("fill_optimal_fit_ascii", length), &text, @@ -56,7 +56,7 @@ pub fn benchmark(c: &mut Criterion) { let options = textwrap::Options::new(LINE_LENGTH) .wrap_algorithm(textwrap::wrap_algorithms::FirstFit) - .word_separator(textwrap::AsciiSpace); + .word_separator(textwrap::word_separators::AsciiSpace); group.bench_with_input( BenchmarkId::new("fill_first_fit", length), &text, diff --git a/examples/interactive.rs b/examples/interactive.rs index a75a7fc4..7e655ee7 100644 --- a/examples/interactive.rs +++ b/examples/interactive.rs @@ -19,9 +19,8 @@ mod unix_only { use termion::raw::{IntoRawMode, RawTerminal}; use termion::screen::AlternateScreen; use termion::{color, cursor, style}; - use textwrap::wrap_algorithms; - use textwrap::{wrap, AsciiSpace, Options, WordSeparator}; - use textwrap::{HyphenSplitter, NoHyphenation, WordSplitter}; + use textwrap::{word_separators, word_splitters, wrap_algorithms}; + use textwrap::{wrap, Options}; #[cfg(feature = "hyphenation")] use hyphenation::{Language, Load, Standard}; @@ -59,8 +58,8 @@ mod unix_only { options: &Options< 'a, Box, - Box, - Box, + Box, + Box, >, splitter_label: &str, stdout: &mut RawTerminal, @@ -238,8 +237,10 @@ mod unix_only { #[cfg(feature = "smawk")] wrap_algorithms.push(Box::new(wrap_algorithms::OptimalFit)); - let mut splitters: Vec> = - vec![Box::new(HyphenSplitter), Box::new(NoHyphenation)]; + let mut splitters: Vec> = vec![ + Box::new(word_splitters::HyphenSplitter), + Box::new(word_splitters::NoHyphenation), + ]; let mut splitter_labels: Vec = splitters.iter().map(|s| format!("{:?}", s)).collect(); @@ -266,7 +267,9 @@ mod unix_only { .break_words(false) .wrap_algorithm(wrap_algorithms.remove(0)) .splitter(splitters.remove(0)) - .word_separator(Box::new(AsciiSpace) as Box); + .word_separator( + Box::new(word_separators::AsciiSpace) as Box + ); let mut splitter_label = splitter_labels.remove(0); let args = std::env::args().collect::>(); diff --git a/examples/layout.rs b/examples/layout.rs index a2bd9108..ddaf543d 100644 --- a/examples/layout.rs +++ b/examples/layout.rs @@ -1,4 +1,5 @@ -use textwrap::{wrap, HyphenSplitter, Options, WordSplitter}; +use textwrap::word_splitters::{HyphenSplitter, WordSplitter}; +use textwrap::{wrap, Options}; fn main() { let example = "Memory safety without garbage collection. \ diff --git a/examples/wasm/src/lib.rs b/examples/wasm/src/lib.rs index 92c94ba8..67869abb 100644 --- a/examples/wasm/src/lib.rs +++ b/examples/wasm/src/lib.rs @@ -3,6 +3,8 @@ use wasm_bindgen::prelude::*; use wasm_bindgen::JsCast; use textwrap::core; +use textwrap::word_separators::{AsciiSpace, UnicodeBreakProperties, WordSeparator}; +use textwrap::word_splitters::{split_words, HyphenSplitter, NoHyphenation, WordSplitter}; use textwrap::wrap_algorithms::{wrap_first_fit, wrap_optimal_fit}; #[wasm_bindgen] @@ -292,22 +294,22 @@ pub fn draw_wrapped_text( let line_height = metrics.actual_bounding_box_ascent() + metrics.actual_bounding_box_descent(); let baseline_distance = 1.5 * line_height; - let word_separator: Box = match options.word_separator { - WasmWordSeparator::AsciiSpace => Box::new(textwrap::AsciiSpace), - WasmWordSeparator::UnicodeBreakProperties => Box::new(textwrap::UnicodeBreakProperties), + let word_separator: Box = match options.word_separator { + WasmWordSeparator::AsciiSpace => Box::new(AsciiSpace), + WasmWordSeparator::UnicodeBreakProperties => Box::new(UnicodeBreakProperties), _ => Err("WasmOptions has an invalid word_separator field")?, }; - let word_splitter: Box = match options.word_splitter { - WasmWordSplitter::NoHyphenation => Box::new(textwrap::NoHyphenation), - WasmWordSplitter::HyphenSplitter => Box::new(textwrap::HyphenSplitter), + let word_splitter: Box = match options.word_splitter { + WasmWordSplitter::NoHyphenation => Box::new(NoHyphenation), + WasmWordSplitter::HyphenSplitter => Box::new(HyphenSplitter), _ => Err("WasmOptions has an invalid word_splitter field")?, }; let mut lineno = 0; for line in text.split('\n') { let words = word_separator.find_words(line); - let split_words = core::split_words(words, &word_splitter); + let split_words = split_words(words, &word_splitter); let canvas_words = split_words .flat_map(|word| { diff --git a/src/core.rs b/src/core.rs index 486f2c96..af024603 100644 --- a/src/core.rs +++ b/src/core.rs @@ -9,12 +9,14 @@ //! //! 1. Split your input into [`Fragment`]s. These are abstract blocks //! of text or content which can be wrapped into lines. See -//! [`WordSeparator`](crate::WordSeparator) for how to do this for -//! text. +//! [`WordSeparator`](crate::word_separators::WordSeparator) for +//! how to do this for text. //! //! 2. Potentially split your fragments into smaller pieces. This -//! allows you to implement things like hyphenation. If wrapping -//! text, [`split_words`] can help you do this. +//! allows you to implement things like hyphenation. If you are +//! wrapping text represented as a sequence of [`Word`]s, then you +//! can use [`split_words`](crate::word_splitters::split_words) can +//! help you do this. //! //! 3. Potentially break apart fragments that are still too large to //! fit on a single line. This is implemented in [`break_words`]. @@ -33,8 +35,6 @@ //! the functionality here is not sufficient or if you have ideas for //! improving it. We would love to hear from you! -use crate::WordSplitter; - /// The CSI or “Control Sequence Introducer” introduces an ANSI escape /// sequence. This is typically used for colored text and will be /// ignored when computing the text width. @@ -221,7 +221,7 @@ pub struct Word<'a> { /// Penalty string to insert if the word falls at the end of a line. pub penalty: &'a str, // Cached width in columns. - width: usize, + pub(crate) width: usize, } impl std::ops::Deref for Word<'_> { @@ -323,70 +323,6 @@ impl Fragment for Word<'_> { } } -/// Split words into smaller words according to the split points given -/// by `options`. -/// -/// Note that we split all words, regardless of their length. This is -/// to more cleanly separate the business of splitting (including -/// automatic hyphenation) from the business of word wrapping. -/// -/// # Examples -/// -/// ``` -/// use textwrap::core::{split_words, Word}; -/// use textwrap::{NoHyphenation, HyphenSplitter}; -/// -/// assert_eq!( -/// split_words(vec![Word::from("foo-bar")], &HyphenSplitter).collect::>(), -/// vec![Word::from("foo-"), Word::from("bar")] -/// ); -/// -/// // The NoHyphenation splitter ignores the '-': -/// assert_eq!( -/// split_words(vec![Word::from("foo-bar")], &NoHyphenation).collect::>(), -/// vec![Word::from("foo-bar")] -/// ); -/// ``` -pub fn split_words<'a, I, WordSplit>( - words: I, - word_splitter: &'a WordSplit, -) -> impl Iterator> -where - I: IntoIterator>, - WordSplit: WordSplitter, -{ - words.into_iter().flat_map(move |word| { - let mut prev = 0; - let mut split_points = word_splitter.split_points(&word).into_iter(); - std::iter::from_fn(move || { - if let Some(idx) = split_points.next() { - let need_hyphen = !word[..idx].ends_with('-'); - let w = Word { - word: &word.word[prev..idx], - width: display_width(&word[prev..idx]), - whitespace: "", - penalty: if need_hyphen { "-" } else { "" }, - }; - prev = idx; - return Some(w); - } - - if prev < word.word.len() || prev == 0 { - let w = Word { - word: &word.word[prev..], - width: display_width(&word[prev..]), - whitespace: word.whitespace, - penalty: word.penalty, - }; - prev = word.word.len() + 1; - return Some(w); - } - - None - }) - }) -} - /// Forcibly break words wider than `line_width` into smaller words. /// /// This simply calls [`Word::break_apart`] on words that are too @@ -410,18 +346,10 @@ where #[cfg(test)] mod tests { use super::*; - use crate::HyphenSplitter; #[cfg(feature = "unicode-width")] use unicode_width::UnicodeWidthChar; - // Like assert_eq!, but the left expression is an iterator. - macro_rules! assert_iter_eq { - ($left:expr, $right:expr) => { - assert_eq!($left.collect::>(), $right); - }; - } - #[test] fn skip_ansi_escape_sequence_works() { let blue_text = "\u{1b}[34mHello\u{1b}[0m"; @@ -503,80 +431,4 @@ mod tests { fn display_width_emojis() { assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20); } - - #[test] - fn split_words_no_words() { - assert_iter_eq!(split_words(vec![], &HyphenSplitter), vec![]); - } - - #[test] - fn split_words_empty_word() { - assert_iter_eq!( - split_words(vec![Word::from(" ")], &HyphenSplitter), - vec![Word::from(" ")] - ); - } - - #[test] - fn split_words_single_word() { - assert_iter_eq!( - split_words(vec![Word::from("foobar")], &HyphenSplitter), - vec![Word::from("foobar")] - ); - } - - #[test] - fn split_words_hyphen_splitter() { - assert_iter_eq!( - split_words(vec![Word::from("foo-bar")], &HyphenSplitter), - vec![Word::from("foo-"), Word::from("bar")] - ); - } - - #[test] - fn split_words_adds_penalty() { - #[derive(Clone, Debug)] - struct FixedSplitPoint; - impl WordSplitter for FixedSplitPoint { - fn split_points(&self, _: &str) -> Vec { - vec![3] - } - } - - assert_iter_eq!( - split_words(vec![Word::from("foobar")].into_iter(), &FixedSplitPoint), - vec![ - Word { - word: "foo", - width: 3, - whitespace: "", - penalty: "-" - }, - Word { - word: "bar", - width: 3, - whitespace: "", - penalty: "" - } - ] - ); - - assert_iter_eq!( - split_words(vec![Word::from("fo-bar")].into_iter(), &FixedSplitPoint), - vec![ - Word { - word: "fo-", - width: 3, - whitespace: "", - penalty: "" - }, - Word { - word: "bar", - width: 3, - whitespace: "", - penalty: "" - } - ] - ); - } } diff --git a/src/lib.rs b/src/lib.rs index bf494384..9326b8ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -138,8 +138,8 @@ //! This feature can be disabled if you are happy to find words //! separated by ASCII space characters only. People wrapping text //! with emojis or East-Asian characters will want most likely want -//! to enable this feature. See the [`WordSeparator`] trait for -//! details. +//! to enable this feature. See the +//! [`word_separators::WordSeparator`] trait for details. //! //! * `unicode-width`: enables correct width computation of non-ASCII //! characters via the [unicode-width] crate. Without this feature, @@ -168,7 +168,7 @@ //! [`Options::with_termwidth`] constructor for details. //! //! * `hyphenation`: enables language-sensitive hyphenation via the -//! [hyphenation] crate. See the [`WordSplitter`] trait for details. +//! [hyphenation] crate. See the [`word_splitters::WordSplitter`] trait for details. //! //! [unicode-linebreak]: https://docs.rs/unicode-linebreak/ //! [unicode-width]: https://docs.rs/unicode-width/ @@ -189,16 +189,10 @@ mod indentation; pub use crate::indentation::dedent; pub use crate::indentation::indent; -mod splitting; -pub use crate::splitting::{HyphenSplitter, NoHyphenation, WordSplitter}; - +pub mod word_separators; +pub mod word_splitters; pub mod wrap_algorithms; -mod word_separator; -#[cfg(feature = "unicode-linebreak")] -pub use word_separator::UnicodeBreakProperties; -pub use word_separator::{AsciiSpace, WordSeparator}; - pub mod core; // These private macros lets us hide the actual WrapAlgorithm and @@ -220,14 +214,14 @@ macro_rules! DefaultWrapAlgorithm { #[cfg(feature = "unicode-linebreak")] macro_rules! DefaultWordSeparator { () => { - UnicodeBreakProperties + word_separators::UnicodeBreakProperties }; } #[cfg(not(feature = "unicode-linebreak"))] macro_rules! DefaultWordSeparator { () => { - AsciiSpace + word_separators::AsciiSpace }; } @@ -236,8 +230,8 @@ macro_rules! DefaultWordSeparator { pub struct Options< 'a, WrapAlgo = Box, - WordSep = Box, - WordSplit = Box, + WordSep = Box, + WordSplit = Box, > { /// The width in columns at which the text will be wrapped. pub width: usize, @@ -254,13 +248,14 @@ pub struct Options< /// Wrapping algorithm to use, see the implementations of the /// [`wrap_algorithms::WrapAlgorithm`] trait for details. pub wrap_algorithm: WrapAlgo, - /// The line breaking algorithm to use, see [`WordSeparator`] - /// trait for an overview and possible implementations. + /// The line breaking algorithm to use, see + /// [`word_separators::WordSeparator`] trait for an overview and + /// possible implementations. pub word_separator: WordSep, /// The method for splitting words. This can be used to prohibit /// splitting words on hyphens, or it can be used to implement /// language-aware machine hyphenation. Please see the - /// [`WordSplitter`] trait for details. + /// [`word_splitters::WordSplitter`] trait for details. pub splitter: WordSplit, } @@ -285,7 +280,12 @@ where } impl<'a> From - for Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), HyphenSplitter> + for Options< + 'a, + DefaultWrapAlgorithm!(), + DefaultWordSeparator!(), + word_splitters::HyphenSplitter, + > { fn from(width: usize) -> Self { Options::new(width) @@ -293,14 +293,16 @@ impl<'a> From } /// Constructors for boxed Options, specifically. -impl<'a> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), HyphenSplitter> { +impl<'a> + Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), word_splitters::HyphenSplitter> +{ /// Creates a new [`Options`] with the specified width and static - /// dispatch using the [`HyphenSplitter`]. Equivalent to + /// dispatch using the [`word_splitters::HyphenSplitter`]. + /// Equivalent to /// /// ``` - /// # use textwrap::{AsciiSpace, Options, HyphenSplitter, WordSplitter}; - /// # #[cfg(feature = "unicode-linebreak")] - /// # use textwrap::UnicodeBreakProperties; + /// # use textwrap::word_splitters::{HyphenSplitter, WordSplitter}; + /// # use textwrap::Options; /// # let width = 80; /// # let actual = Options::new(width); /// # let expected = @@ -310,14 +312,14 @@ impl<'a> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), HyphenSpl /// subsequent_indent: "", /// break_words: true, /// #[cfg(feature = "unicode-linebreak")] - /// word_separator: UnicodeBreakProperties, + /// word_separator: textwrap::word_separators::UnicodeBreakProperties, /// #[cfg(not(feature = "unicode-linebreak"))] - /// word_separator: AsciiSpace, + /// word_separator: textwrap::word_separators::AsciiSpace, /// #[cfg(feature = "smawk")] /// wrap_algorithm: textwrap::wrap_algorithms::OptimalFit, /// #[cfg(not(feature = "smawk"))] /// wrap_algorithm: textwrap::wrap_algorithms::FirstFit, - /// splitter: HyphenSplitter, + /// splitter: textwrap::word_splitters::HyphenSplitter, /// } /// # ; /// # assert_eq!(actual.width, expected.width); @@ -336,21 +338,24 @@ impl<'a> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), HyphenSpl /// /// Dynamic dispatch on the other hand, means that the word /// separator and/or splitter is stored as a trait object such as - /// a `Box`. This way the splitter's inner type - /// can be changed without changing the type of this struct, which - /// then would be just `Options` as a short cut for - /// `Options, Box>`. + /// a `Box`. This way the + /// splitter's inner type can be changed without changing the type + /// of this struct, which then would be just `Options` as a short + /// cut for `Options, + /// Box>`. /// /// The value and type of the splitter can be choose from the /// start using the [`Options::with_splitter`] constructor or /// changed afterwards using the [`Options::splitter`] method. /// Whether static or dynamic dispatch is used, depends on whether - /// these functions are given a boxed [`WordSplitter`] or not. + /// these functions are given a boxed [`word_splitters::WordSplitter`] or not. /// Take for example: /// /// ``` - /// use textwrap::{HyphenSplitter, NoHyphenation, Options}; - /// # use textwrap::{AsciiSpace, WordSplitter}; + /// use textwrap::Options; + /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation}; + /// # use textwrap::word_splitters::WordSplitter; + /// # use textwrap::word_separators::AsciiSpace; /// # let width = 80; /// /// // uses HyphenSplitter with static dispatch @@ -362,11 +367,11 @@ impl<'a> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), HyphenSpl /// let opt = Options::new(width).splitter(NoHyphenation); /// /// // uses HyphenSplitter with dynamic dispatch - /// // the actual type: Options> + /// // the actual type: Options> /// let opt: Options<_, _, _> = Options::new(width).splitter(Box::new(HyphenSplitter)); /// /// // uses NoHyphenation with dynamic dispatch - /// // the actual type: Options> + /// // the actual type: Options> /// let opt: Options<_, _, _> = Options::new(width).splitter(Box::new(NoHyphenation)); /// ``` /// @@ -375,7 +380,7 @@ impl<'a> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), HyphenSpl /// allows to change the splitter at run-time without changing the /// variables type. pub const fn new(width: usize) -> Self { - Options::with_splitter(width, HyphenSplitter) + Options::with_splitter(width, word_splitters::HyphenSplitter) } /// Creates a new [`Options`] with `width` set to the current @@ -406,9 +411,8 @@ impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!() /// splitter. Equivalent to /// /// ``` - /// # use textwrap::{AsciiSpace, Options, NoHyphenation, HyphenSplitter}; - /// # #[cfg(feature = "unicode-linebreak")] - /// # use textwrap::UnicodeBreakProperties; + /// # use textwrap::Options; + /// # use textwrap::word_splitters::{NoHyphenation, HyphenSplitter}; /// # const splitter: NoHyphenation = NoHyphenation; /// # const width: usize = 80; /// # let actual = Options::with_splitter(width, splitter); @@ -419,9 +423,9 @@ impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!() /// subsequent_indent: "", /// break_words: true, /// #[cfg(feature = "unicode-linebreak")] - /// word_separator: UnicodeBreakProperties, + /// word_separator: textwrap::word_separators::UnicodeBreakProperties, /// #[cfg(not(feature = "unicode-linebreak"))] - /// word_separator: textwrap::AsciiSpace, + /// word_separator: textwrap::word_separators::AsciiSpace, /// #[cfg(feature = "smawk")] /// wrap_algorithm: textwrap::wrap_algorithms::OptimalFit, /// #[cfg(not(feature = "smawk"))] @@ -442,14 +446,15 @@ impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!() /// dynamic dispatch: /// /// ``` - /// use textwrap::{HyphenSplitter, NoHyphenation, Options}; - /// # use textwrap::{WordSplitter}; + /// use textwrap::Options; + /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter}; /// # const width: usize = 80; /// /// // This opt contains a boxed trait object as splitter. /// // The type annotation is important, otherwise it will be not a trait object - /// let mut opt: Options<_, _, Box> = Options::with_splitter(width, Box::new(NoHyphenation)); - /// // Its type is actually: `Options>`: + /// let mut opt: Options<_, _, Box> + /// = Options::with_splitter(width, Box::new(NoHyphenation)); + /// // Its type is actually: `Options>`: /// let opt_coerced: Options<_, _, Box> = opt; /// /// // Thus, it can be overridden with a different splitter. @@ -467,7 +472,8 @@ impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!() /// context: /// /// ``` - /// use textwrap::{HyphenSplitter, Options, AsciiSpace}; + /// use textwrap::word_splitters::HyphenSplitter; use textwrap::{ Options}; + /// use textwrap::word_separators::AsciiSpace; /// use textwrap::wrap_algorithms::FirstFit; /// # const width: usize = 80; /// @@ -581,7 +587,7 @@ impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit> /// Change [`self.word_separator`]. /// - /// See [`WordSeparator`] for details on the choices. + /// See [`word_separators::WordSeparator`] for details on the choices. /// /// [`self.word_separator`]: #structfield.word_separator pub fn word_separator( @@ -620,7 +626,7 @@ impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit> } } - /// Change [`self.splitter`]. The [`WordSplitter`] is used to fit + /// Change [`self.splitter`]. The [`word_splitters::WordSplitter`] is used to fit /// part of a word into the current line when wrapping text. /// /// This function may return a different type than `Self`. That is @@ -629,7 +635,8 @@ impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit> /// example: /// /// ``` - /// use textwrap::{HyphenSplitter, NoHyphenation, Options}; + /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation}; + /// use textwrap::Options; /// // The default type returned by `new`: /// let opt: Options<_, _, HyphenSplitter> = Options::new(80); /// // Setting a different splitter changes the type @@ -665,7 +672,8 @@ impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit> /// with a two column margin to the left and the right: /// /// ```no_run -/// use textwrap::{termwidth, NoHyphenation, Options}; +/// use textwrap::{termwidth, Options}; +/// use textwrap::word_splitters::NoHyphenation; /// /// let width = termwidth() - 4; // Two columns on each side. /// let options = Options::new(width) @@ -716,8 +724,8 @@ pub fn termwidth() -> usize { pub fn fill<'a, WrapAlgo, WordSep, WordSplit, Opt>(text: &str, width_or_options: Opt) -> String where WrapAlgo: wrap_algorithms::WrapAlgorithm, - WordSep: WordSeparator, - WordSplit: WordSplitter, + WordSep: word_separators::WordSeparator, + WordSplit: word_splitters::WordSplitter, Opt: Into>, { // This will avoid reallocation in simple cases (no @@ -784,7 +792,7 @@ pub fn unfill( text: &str, ) -> ( String, - Options<'_, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), HyphenSplitter>, + Options<'_, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), word_splitters::HyphenSplitter>, ) { let trimmed = text.trim_end_matches('\n'); let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/']; @@ -886,8 +894,8 @@ pub fn refill<'a, WrapAlgo, WordSep, WordSplit, Opt>( ) -> String where WrapAlgo: wrap_algorithms::WrapAlgorithm, - WordSep: WordSeparator, - WordSplit: WordSplitter, + WordSep: word_separators::WordSeparator, + WordSplit: word_splitters::WordSplitter, Opt: Into>, { let trimmed = filled_text.trim_end_matches('\n'); @@ -1075,8 +1083,8 @@ pub fn wrap<'a, WrapAlgo, WordSep, WordSplit, Opt>( ) -> Vec> where WrapAlgo: wrap_algorithms::WrapAlgorithm, - WordSep: WordSeparator, - WordSplit: WordSplitter, + WordSep: word_separators::WordSeparator, + WordSplit: word_splitters::WordSplitter, Opt: Into>, { let options = width_or_options.into(); @@ -1091,7 +1099,7 @@ where let mut lines = Vec::new(); for line in text.split('\n') { let words = options.word_separator.find_words(line); - let split_words = core::split_words(words, &options.splitter); + let split_words = word_splitters::split_words(words, &options.splitter); let broken_words = if options.break_words { let mut broken_words = core::break_words(split_words, subsequent_width); if !options.initial_indent.is_empty() { @@ -1227,8 +1235,8 @@ pub fn wrap_columns<'a, WrapAlgo, WordSep, WordSplit, Opt>( ) -> Vec where WrapAlgo: wrap_algorithms::WrapAlgorithm, - WordSep: WordSeparator, - WordSplit: WordSplitter, + WordSep: word_separators::WordSeparator, + WordSplit: word_splitters::WordSplitter, Opt: Into>, { assert!(columns > 0); @@ -1288,17 +1296,17 @@ where /// [`fill`] with these options: /// /// ``` -/// # use textwrap::{core, AsciiSpace, Options, NoHyphenation}; -/// # use textwrap::wrap_algorithms; +/// # use textwrap::{core, Options}; +/// # use textwrap::{word_separators, word_splitters, wrap_algorithms}; /// # let width = 80; /// Options { /// width: width, /// initial_indent: "", /// subsequent_indent: "", /// break_words: false, -/// word_separator: AsciiSpace, +/// word_separator: word_separators::AsciiSpace, /// wrap_algorithm: wrap_algorithms::FirstFit, -/// splitter: NoHyphenation, +/// splitter: word_splitters::NoHyphenation, /// }; /// ``` /// @@ -1328,11 +1336,14 @@ where /// benchmark](https://github.com/mgeisler/textwrap/blob/master/benches/linear.rs) /// for details. pub fn fill_inplace(text: &mut String, width: usize) { + use word_separators::WordSeparator; let mut indices = Vec::new(); let mut offset = 0; for line in text.split('\n') { - let words = AsciiSpace.find_words(line).collect::>(); + let words = word_separators::AsciiSpace + .find_words(line) + .collect::>(); let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[width]); let mut line_offset = offset; @@ -1363,7 +1374,8 @@ pub fn fill_inplace(text: &mut String, width: usize) { #[cfg(test)] mod tests { use super::*; - use crate::wrap_algorithms; + use crate::word_splitters::WordSplitter; + use crate::{word_splitters, wrap_algorithms}; #[cfg(feature = "hyphenation")] use hyphenation::{Language, Load, Standard}; @@ -1460,7 +1472,7 @@ mod tests { fn issue_129() { // The dash is an em-dash which takes up four bytes. We used // to panic since we tried to index into the character. - let options = Options::new(1).word_separator(AsciiSpace); + let options = Options::new(1).word_separator(word_separators::AsciiSpace); assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]); } @@ -1471,7 +1483,7 @@ mod tests { assert_eq!( wrap( "Hello, World!", - Options::new(15).word_separator(AsciiSpace) + Options::new(15).word_separator(word_separators::AsciiSpace) ), vec!["Hello,", "World!"] ); @@ -1482,7 +1494,7 @@ mod tests { assert_eq!( wrap( "Hello, World!", - Options::new(15).word_separator(UnicodeBreakProperties) + Options::new(15).word_separator(word_separators::UnicodeBreakProperties) ), vec!["Hello, W", "orld!"] ); @@ -1614,25 +1626,27 @@ mod tests { #[test] fn simple_hyphens_static() { - let options = Options::new(8).splitter(HyphenSplitter); + let options = Options::new(8).splitter(word_splitters::HyphenSplitter); assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]); } #[test] fn simple_hyphens_dynamic() { - let options: Options<_, _> = Options::new(8).splitter(Box::new(HyphenSplitter)); + let options: Options<_, _> = + Options::new(8).splitter(Box::new(word_splitters::HyphenSplitter)); assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]); } #[test] fn no_hyphenation_static() { - let options = Options::new(8).splitter(NoHyphenation); + let options = Options::new(8).splitter(word_splitters::NoHyphenation); assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]); } #[test] fn no_hyphenation_dynamic() { - let options: Options<_, _> = Options::new(8).splitter(Box::new(NoHyphenation)); + let options: Options<_, _> = + Options::new(8).splitter(Box::new(word_splitters::NoHyphenation)); assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]); } @@ -1657,8 +1671,8 @@ mod tests { #[cfg(feature = "hyphenation")] fn auto_hyphenation_double_hyphenation_dynamic() { let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); - let mut options: Options<_, _, Box> = - Options::new(10).splitter(Box::new(HyphenSplitter)); + let mut options: Options<_, _, Box> = + Options::new(10).splitter(Box::new(word_splitters::HyphenSplitter)); assert_eq!( wrap("Internationalization", &options), vec!["Internatio", "nalization"] @@ -1747,7 +1761,7 @@ mod tests { fn break_words_wide_characters() { // Even the poor man's version of `ch_width` counts these // characters as wide. - let options = Options::new(5).word_separator(AsciiSpace); + let options = Options::new(5).word_separator(word_separators::AsciiSpace); assert_eq!(wrap("Hello", options), vec!["He", "ll", "o"]); } @@ -1838,8 +1852,11 @@ mod tests { #[cfg(not(feature = "smawk"))] #[cfg(not(feature = "unicode-linebreak"))] fn cloning_works() { - static OPT: Options = - Options::with_splitter(80, HyphenSplitter); + static OPT: Options< + wrap_algorithms::FirstFit, + word_separators::AsciiSpace, + word_splitters::HyphenSplitter, + > = Options::with_splitter(80, word_splitters::HyphenSplitter); #[allow(clippy::clone_on_copy)] let opt = OPT.clone(); assert_eq!(opt.width, 80); @@ -1983,22 +2000,37 @@ mod tests { #[test] fn trait_object_vec() { // Create a vector of Options containing trait-objects. - let mut vector: Vec, Box>> = Vec::new(); + let mut vector: Vec< + Options< + _, + Box, + Box, + >, + > = Vec::new(); // Expected result from each options let mut results = Vec::new(); - let opt_full_type: Options<_, Box, Box> = + let opt_full_type: Options< + _, + Box, + Box, + > = Options::new(10) - .splitter(Box::new(HyphenSplitter) as Box) - .word_separator(Box::new(AsciiSpace) as Box); + .splitter(Box::new(word_splitters::HyphenSplitter) + as Box) + .word_separator(Box::new(word_separators::AsciiSpace) + as Box); vector.push(opt_full_type); results.push(vec!["over-", "caffinated"]); - // Actually: Options, Box> - let opt_abbreviated_type = Options::new(10) - .break_words(false) - .splitter(Box::new(NoHyphenation) as Box) - .word_separator(Box::new(AsciiSpace) as Box); + // Actually: Options, Box> + let opt_abbreviated_type = + Options::new(10) + .break_words(false) + .splitter(Box::new(word_splitters::NoHyphenation) + as Box) + .word_separator(Box::new(word_separators::AsciiSpace) + as Box); vector.push(opt_abbreviated_type); results.push(vec!["over-caffinated"]); @@ -2006,8 +2038,9 @@ mod tests { { let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); let opt_hyp = Options::new(8) - .splitter(Box::new(dictionary) as Box) - .word_separator(Box::new(AsciiSpace) as Box); + .splitter(Box::new(dictionary) as Box) + .word_separator(Box::new(word_separators::AsciiSpace) + as Box); vector.push(opt_hyp); results.push(vec!["over-", "caffi-", "nated"]); } diff --git a/src/word_separator.rs b/src/word_separators.rs similarity index 90% rename from src/word_separator.rs rename to src/word_separators.rs index 9c5887f5..cb1b8a9c 100644 --- a/src/word_separator.rs +++ b/src/word_separators.rs @@ -1,4 +1,18 @@ -//! Line breaking functionality. +//! Functionality for finding words. +//! +//! In order to wrap text, we need to know where the legal break +//! points are, i.e., where the words of the text are. This means that +//! we need to define what a "word" is. +//! +//! A simple approach is to simply split the text on whitespace, but +//! this does not work for East-Asian languages such as Chinese or +//! Japanese where there are no spaces between words. Breaking a long +//! sequence of emojis is another example where line breaks might be +//! wanted even if there are no whitespace to be found. +//! +//! The [`WordSeparator`] trait is responsible for determining where +//! there words are in a line of text. Please refer to the trait and +//! the structs which implement it for more information. #[cfg(feature = "unicode-linebreak")] use crate::core::skip_ansi_escape_sequence; @@ -12,14 +26,15 @@ use crate::core::Word; /// breaking algorithm, which finds break points in non-ASCII text. /// /// The line breaks occur between words, please see the -/// [`WordSplitter`](crate::WordSplitter) trait for options of how -/// to handle hyphenation of individual words. +/// [`WordSplitter`](crate::word_splitters::WordSplitter) trait for +/// options of how to handle hyphenation of individual words. /// /// # Examples /// /// ``` -/// use textwrap::{WordSeparator, AsciiSpace}; /// use textwrap::core::Word; +/// use textwrap::word_separators::{WordSeparator, AsciiSpace}; +/// /// let words = AsciiSpace.find_words("Hello World!").collect::>(); /// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]); /// ``` @@ -35,6 +50,7 @@ pub trait WordSeparator: WordSeparatorClone + std::fmt::Debug { // `Clone` for `Box`. This in used in the // `From<&Options<'_, WrapAlgo, WordSep, WordSplit>> for Options<'a, // WrapAlgo, WordSep, WordSplit>` implementation. +#[doc(hidden)] pub trait WordSeparatorClone { fn clone_box(&self) -> Box; } @@ -69,7 +85,7 @@ pub struct AsciiSpace; /// /// ``` /// use textwrap::core::Word; -/// use textwrap::{AsciiSpace, WordSeparator}; +/// use textwrap::word_separators::{AsciiSpace, WordSeparator}; /// /// let words = AsciiSpace.find_words("Hello World!").collect::>(); /// assert_eq!(words, vec![Word::from("Hello "), @@ -123,8 +139,8 @@ pub struct UnicodeBreakProperties; /// to break lines. There is a small difference in that the U+002D /// (Hyphen-Minus) and U+00AD (Soft Hyphen) don’t create a line break: /// to allow a line break at a hyphen, use the -/// [`HyphenSplitter`](super::HyphenSplitter). Soft hyphens are not -/// currently supported. +/// [`HyphenSplitter`](crate::word_splitters::HyphenSplitter). Soft +/// hyphens are not currently supported. /// /// # Examples /// @@ -134,7 +150,7 @@ pub struct UnicodeBreakProperties; /// /// ``` /// #[cfg(feature = "unicode-linebreak")] { -/// use textwrap::{WordSeparator, UnicodeBreakProperties}; +/// use textwrap::word_separators::{WordSeparator, UnicodeBreakProperties}; /// use textwrap::core::Word; /// /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::>(), @@ -154,7 +170,7 @@ pub struct UnicodeBreakProperties; /// /// ``` /// #[cfg(feature = "unicode-linebreak")] { -/// use textwrap::{UnicodeBreakProperties, WordSeparator}; +/// use textwrap::word_separators::{UnicodeBreakProperties, WordSeparator}; /// use textwrap::core::Word; /// /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::>(), @@ -168,7 +184,7 @@ pub struct UnicodeBreakProperties; /// /// ``` /// #[cfg(feature = "unicode-linebreak")] { -/// use textwrap::{UnicodeBreakProperties, WordSeparator}; +/// use textwrap::word_separators::{UnicodeBreakProperties, WordSeparator}; /// use textwrap::core::Word; /// /// assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::>(), diff --git a/src/splitting.rs b/src/word_splitters.rs similarity index 53% rename from src/splitting.rs rename to src/word_splitters.rs index 01d65a41..87cc7188 100644 --- a/src/splitting.rs +++ b/src/word_splitters.rs @@ -7,6 +7,8 @@ use std::ops::Deref; +use crate::core::{display_width, Word}; + /// The `WordSplitter` trait describes where words can be split. /// /// If the textwrap crate has been compiled with the `hyphenation` @@ -46,7 +48,7 @@ pub trait WordSplitter: WordSplitterClone + std::fmt::Debug { /// # Examples /// /// ``` - /// use textwrap::{HyphenSplitter, NoHyphenation, WordSplitter}; + /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter}; /// assert_eq!(NoHyphenation.split_points("cannot-be-split"), vec![]); /// assert_eq!(HyphenSplitter.split_points("can-be-split"), vec![4, 7]); /// ``` @@ -57,6 +59,7 @@ pub trait WordSplitter: WordSplitterClone + std::fmt::Debug { // `Clone` for `Box`. This in used in the // `From<&Options<'_, WrapAlgo, WordSep, WordSplit>> for Options<'a, // WrapAlgo, WordSep, WordSplit>` implementation. +#[doc(hidden)] pub trait WordSplitterClone { fn clone_box(&self) -> Box; } @@ -83,7 +86,8 @@ impl WordSplitter for Box { /// hyphenation: /// /// ``` -/// use textwrap::{wrap, NoHyphenation, Options}; +/// use textwrap::{wrap, Options}; +/// use textwrap::word_splitters::NoHyphenation; /// /// let options = Options::new(8).splitter(NoHyphenation); /// assert_eq!(wrap("foo bar-baz", &options), @@ -153,3 +157,155 @@ impl WordSplitter for hyphenation::Standard { self.hyphenate(word).breaks } } + +/// Split words into smaller words according to the split points given +/// by `word_splitter`. +/// +/// Note that we split all words, regardless of their length. This is +/// to more cleanly separate the business of splitting (including +/// automatic hyphenation) from the business of word wrapping. +/// +/// # Examples +/// +/// ``` +/// use textwrap::core::Word; +/// use textwrap::word_splitters::{split_words, NoHyphenation, HyphenSplitter}; +/// +/// assert_eq!( +/// split_words(vec![Word::from("foo-bar")], &HyphenSplitter).collect::>(), +/// vec![Word::from("foo-"), Word::from("bar")] +/// ); +/// +/// // The NoHyphenation splitter ignores the '-': +/// assert_eq!( +/// split_words(vec![Word::from("foo-bar")], &NoHyphenation).collect::>(), +/// vec![Word::from("foo-bar")] +/// ); +/// ``` +pub fn split_words<'a, I, WordSplit>( + words: I, + word_splitter: &'a WordSplit, +) -> impl Iterator> +where + I: IntoIterator>, + WordSplit: WordSplitter, +{ + words.into_iter().flat_map(move |word| { + let mut prev = 0; + let mut split_points = word_splitter.split_points(&word).into_iter(); + std::iter::from_fn(move || { + if let Some(idx) = split_points.next() { + let need_hyphen = !word[..idx].ends_with('-'); + let w = Word { + word: &word.word[prev..idx], + width: display_width(&word[prev..idx]), + whitespace: "", + penalty: if need_hyphen { "-" } else { "" }, + }; + prev = idx; + return Some(w); + } + + if prev < word.word.len() || prev == 0 { + let w = Word { + word: &word.word[prev..], + width: display_width(&word[prev..]), + whitespace: word.whitespace, + penalty: word.penalty, + }; + prev = word.word.len() + 1; + return Some(w); + } + + None + }) + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + // Like assert_eq!, but the left expression is an iterator. + macro_rules! assert_iter_eq { + ($left:expr, $right:expr) => { + assert_eq!($left.collect::>(), $right); + }; + } + + #[test] + fn split_words_no_words() { + assert_iter_eq!(split_words(vec![], &HyphenSplitter), vec![]); + } + + #[test] + fn split_words_empty_word() { + assert_iter_eq!( + split_words(vec![Word::from(" ")], &HyphenSplitter), + vec![Word::from(" ")] + ); + } + + #[test] + fn split_words_single_word() { + assert_iter_eq!( + split_words(vec![Word::from("foobar")], &HyphenSplitter), + vec![Word::from("foobar")] + ); + } + + #[test] + fn split_words_hyphen_splitter() { + assert_iter_eq!( + split_words(vec![Word::from("foo-bar")], &HyphenSplitter), + vec![Word::from("foo-"), Word::from("bar")] + ); + } + + #[test] + fn split_words_adds_penalty() { + #[derive(Clone, Debug)] + struct FixedSplitPoint; + impl WordSplitter for FixedSplitPoint { + fn split_points(&self, _: &str) -> Vec { + vec![3] + } + } + + assert_iter_eq!( + split_words(vec![Word::from("foobar")].into_iter(), &FixedSplitPoint), + vec![ + Word { + word: "foo", + width: 3, + whitespace: "", + penalty: "-" + }, + Word { + word: "bar", + width: 3, + whitespace: "", + penalty: "" + } + ] + ); + + assert_iter_eq!( + split_words(vec![Word::from("fo-bar")].into_iter(), &FixedSplitPoint), + vec![ + Word { + word: "fo-", + width: 3, + whitespace: "", + penalty: "" + }, + Word { + word: "bar", + width: 3, + whitespace: "", + penalty: "" + } + ] + ); + } +} diff --git a/src/wrap_algorithms.rs b/src/wrap_algorithms.rs index 0b103935..368ef2a4 100644 --- a/src/wrap_algorithms.rs +++ b/src/wrap_algorithms.rs @@ -108,7 +108,7 @@ impl WrapAlgorithm for FirstFit { /// ``` /// use textwrap::core::Word; /// use textwrap::wrap_algorithms; -/// use textwrap::{AsciiSpace, WordSeparator}; +/// use textwrap::word_separators::{AsciiSpace, WordSeparator}; /// /// // Helper to convert wrapped lines to a Vec. /// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec { diff --git a/tests/traits.rs b/tests/traits.rs index dfc52b9e..489269df 100644 --- a/tests/traits.rs +++ b/tests/traits.rs @@ -1,14 +1,11 @@ +use textwrap::word_separators::{AsciiSpace, WordSeparator}; +use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter}; use textwrap::wrap_algorithms::{FirstFit, WrapAlgorithm}; use textwrap::Options; -use textwrap::{AsciiSpace, WordSeparator}; -use textwrap::{NoHyphenation, WordSplitter}; /// Cleaned up type name. fn type_name(_val: &T) -> String { - std::any::type_name::() - .replace("alloc::boxed::Box", "Box") - .replace("textwrap::word_separator", "textwrap") - .replace("textwrap::splitting", "textwrap") + std::any::type_name::().replace("alloc::boxed::Box", "Box") } #[test] @@ -22,20 +19,20 @@ fn static_hyphensplitter() { format!( "textwrap::Options<{}, {}, {}>", "textwrap::wrap_algorithms::FirstFit", - "textwrap::AsciiSpace", - "textwrap::HyphenSplitter" + "textwrap::word_separators::AsciiSpace", + "textwrap::word_splitters::HyphenSplitter" ) ); // Inferring part of the type. - let options: Options<_, _, textwrap::HyphenSplitter> = Options::new(10); + let options: Options<_, _, HyphenSplitter> = Options::new(10); assert_eq!( type_name(&options), format!( "textwrap::Options<{}, {}, {}>", "textwrap::wrap_algorithms::FirstFit", - "textwrap::AsciiSpace", - "textwrap::HyphenSplitter" + "textwrap::word_separators::AsciiSpace", + "textwrap::word_splitters::HyphenSplitter" ) ); @@ -46,8 +43,8 @@ fn static_hyphensplitter() { format!( "textwrap::Options<{}, {}, {}>", "textwrap::wrap_algorithms::FirstFit", - "textwrap::AsciiSpace", - "textwrap::HyphenSplitter" + "textwrap::word_separators::AsciiSpace", + "textwrap::word_splitters::HyphenSplitter" ) ); } @@ -64,8 +61,8 @@ fn box_static_nohyphenation() { format!( "textwrap::Options<{}, {}, {}>", "Box", - "Box", - "Box" + "Box", + "Box" ) ); } @@ -75,15 +72,15 @@ fn box_dyn_wordsplitter() { // Inferred dynamic type due to default type parameter. let options = Options::new(10) .wrap_algorithm(Box::new(FirstFit) as Box) - .splitter(Box::new(NoHyphenation) as Box) + .splitter(Box::new(HyphenSplitter) as Box) .word_separator(Box::new(AsciiSpace) as Box); assert_eq!( type_name(&options), format!( "textwrap::Options<{}, {}, {}>", "Box", - "Box", - "Box" + "Box", + "Box" ) ); }