Addressing PR feedback.

Lukasz Anforowicz · Lukasz Anforowicz · commit 6bbbe5c7c1e0 · 2022-10-13T20:17:59.000Z
diff --git a/src/comment.rs b/src/comment.rs
@@ -432,14 +432,18 @@ impl CodeBlockAttribute {
 
 /// Block that is formatted as an item.
 ///
-/// An item starts with either a star `*`, a dash `-`, a greater-than `>`,
-/// or a number `12.` or `34)` (with at most 2 digits).
+/// An item starts with either a star `*`, a dash `-`, a greater-than `>`, or a number `12.` or
+/// `34)` (with at most 2 digits). An item represents CommonMark's ["list
+/// items"](https://spec.commonmark.org/0.30/#list-items) and/or ["block
+/// quotes"](https://spec.commonmark.org/0.30/#block-quotes), but note that only a subset of
+/// CommonMark is recognized - see the doc comment of `ItemizedBlock::get_marker_length` for more
+/// details.
 ///
 /// Different level of indentation are handled by shrinking the shape accordingly.
 struct ItemizedBlock {
     /// the lines that are identified as part of an itemized block
     lines: Vec<String>,
-    /// the number of characters (typically whitespaces) up to the item sigil
+    /// the number of characters (typically whitespaces) up to the item marker
     indent: usize,
     /// the string that marks the start of an item
     opener: String,
@@ -448,12 +452,33 @@ struct ItemizedBlock {
 }
 
 impl ItemizedBlock {
-    /// Returns the sigil's (e.g. "- ", "* ", or "1. ") length or None if there is no sigil.
-    fn get_sigil_length(trimmed: &str) -> Option<usize> {
+    /// Checks whether the `trimmed` line includes an item marker. Returns `None` if there is no
+    /// marker. Returns the length of the marker if one is present. Note that the length includes
+    /// the whitespace that follows the marker, for example the marker in `"* list item"` has the
+    /// length of 2.
+    ///
+    /// This function recognizes item markers that correspond to CommonMark's
+    /// ["bullet list marker"](https://spec.commonmark.org/0.30/#bullet-list-marker),
+    /// ["block quote marker"](https://spec.commonmark.org/0.30/#block-quote-marker), and/or
+    /// ["ordered list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker).
+    ///
+    /// Compared to CommonMark specification, the number of digits that are allowed in an ["ordered
+    /// list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker) is more limited (to at
+    /// most 2 digits). Limiting the length of the marker helps reduce the risk of recognizing
+    /// arbitrary numbers as markers. See also
+    /// https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990 which gives the
+    /// following example where a number (i.e. "1868") doesn't signify an ordered list:
+    /// >  The Captain died in
+    /// >  1868. He wes buried in...
+    fn get_marker_length(trimmed: &str) -> Option<usize> {
+        // https://spec.commonmark.org/0.30/#bullet-list-marker or
+        // https://spec.commonmark.org/0.30/#block-quote-marker
         if trimmed.starts_with("* ") || trimmed.starts_with("- ") || trimmed.starts_with("> ") {
             return Some(2);
         }
 
+        // https://spec.commonmark.org/0.30/#ordered-list-marker, where at most 2 digits are
+        // allowed.
         for suffix in [". ", ") "] {
             if let Some((prefix, _)) = trimmed.split_once(suffix) {
                 if prefix.len() <= 2 && prefix.chars().all(|c| char::is_ascii_digit(&c)) {
@@ -462,32 +487,31 @@ impl ItemizedBlock {
             }
         }
 
-        None
+        None // No markers found.
     }
 
     /// Creates a new ItemizedBlock described with the given `line`
     /// or None if `line` doesn't start an item.
     fn new(line: &str) -> Option<ItemizedBlock> {
-        ItemizedBlock::get_sigil_length(line.trim_start()).map(|sigil_length| {
-            let space_to_sigil = line.chars().take_while(|c| c.is_whitespace()).count();
-            let mut indent = space_to_sigil + sigil_length;
-            let mut line_start = " ".repeat(indent);
-
-            // Markdown blockquote start with a "> "
-            if line.trim_start().starts_with(">") {
-                // remove the original +2 indent because there might be multiple nested block quotes
-                // and it's easier to reason about the final indent by just taking the length
-                // of the new line_start. We update the indent because it effects the max width
-                // of each formatted line.
-                line_start = itemized_block_quote_start(line, line_start, 2);
-                indent = line_start.len();
-            }
-            ItemizedBlock {
-                lines: vec![line[indent..].to_string()],
-                indent,
-                opener: line[..indent].to_string(),
-                line_start,
-            }
+        let marker_length = ItemizedBlock::get_marker_length(line.trim_start())?;
+        let space_to_marker = line.chars().take_while(|c| c.is_whitespace()).count();
+        let mut indent = space_to_marker + marker_length;
+        let mut line_start = " ".repeat(indent);
+
+        // Markdown blockquote start with a "> "
+        if line.trim_start().starts_with(">") {
+            // remove the original +2 indent because there might be multiple nested block quotes
+            // and it's easier to reason about the final indent by just taking the length
+            // of the new line_start. We update the indent because it effects the max width
+            // of each formatted line.
+            line_start = itemized_block_quote_start(line, line_start, 2);
+            indent = line_start.len();
+        }
+        Some(ItemizedBlock {
+            lines: vec![line[indent..].to_string()],
+            indent,
+            opener: line[..indent].to_string(),
+            line_start,
         })
     }
 
@@ -507,7 +531,7 @@ impl ItemizedBlock {
     /// Returns `true` if the line is part of the current itemized block.
     /// If it is, then it is added to the internal lines list.
     fn add_line(&mut self, line: &str) -> bool {
-        if ItemizedBlock::get_sigil_length(line.trim_start()).is_none()
+        if ItemizedBlock::get_marker_length(line.trim_start()).is_none()
             && self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
         {
             self.lines.push(line.to_string());
@@ -2058,27 +2082,30 @@ fn main() {
 
         run_test("1. foo", "foo", 3, "1. ", "   ");
         run_test("12. foo", "foo", 4, "12. ", "    ");
+        run_test("1) foo", "foo", 3, "1) ", "   ");
+        run_test("12) foo", "foo", 4, "12) ", "    ");
 
         run_test("    - foo", "foo", 6, "    - ", "      ");
     }
 
     #[test]
-    fn test_itemized_block_nonobvious_sigils_are_rejected() {
+    fn test_itemized_block_nonobvious_markers_are_rejected() {
         let test_inputs = vec![
-            // Non-numeric sigils (e.g. `a.` or `iv.`) are not supported, because of a risk of
-            // misidentifying regular words as sigils.  See also the discussion in
-            // https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
+            // Non-numeric item markers (e.g. `a.` or `iv.`) are not allowed by
+            // https://spec.commonmark.org/0.30/#ordered-list-marker. We also note that allowing
+            // them would risk misidentifying regular words as item markers. See also the
+            // discussion in https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
             "word.  rest of the paragraph.",
             "a.  maybe this is a list item?  maybe not?",
             "iv.  maybe this is a list item?  maybe not?",
-            // Numbers with 3 or more digits are not recognized as sigils, to avoid
+            // Numbers with 3 or more digits are not recognized as item markers, to avoid
             // formatting the following example as a list:
             //
             // ```
             // The Captain died in
-            // 1868.  He was buried in...
+            // 1868. He was buried in...
             // ```
-            "123.  only 2-digit numbers are recognized as sigils.",
+            "123.  only 2-digit numbers are recognized as item markers.",
             // Parens.
             "123)  giving some coverage to parens as well.",
             "a)  giving some coverage to parens as well.",