Skip to content

Commit 6bbbe5c

Browse files
author
Lukasz Anforowicz
committed
Addressing PR feedback.
1 parent d0a5804 commit 6bbbe5c

File tree

1 file changed

+61
-34
lines changed

1 file changed

+61
-34
lines changed

src/comment.rs

+61-34
Original file line numberDiff line numberDiff line change
@@ -432,14 +432,18 @@ impl CodeBlockAttribute {
432432

433433
/// Block that is formatted as an item.
434434
///
435-
/// An item starts with either a star `*`, a dash `-`, a greater-than `>`,
436-
/// or a number `12.` or `34)` (with at most 2 digits).
435+
/// An item starts with either a star `*`, a dash `-`, a greater-than `>`, or a number `12.` or
436+
/// `34)` (with at most 2 digits). An item represents CommonMark's ["list
437+
/// items"](https://spec.commonmark.org/0.30/#list-items) and/or ["block
438+
/// quotes"](https://spec.commonmark.org/0.30/#block-quotes), but note that only a subset of
439+
/// CommonMark is recognized - see the doc comment of `ItemizedBlock::get_marker_length` for more
440+
/// details.
437441
///
438442
/// Different level of indentation are handled by shrinking the shape accordingly.
439443
struct ItemizedBlock {
440444
/// the lines that are identified as part of an itemized block
441445
lines: Vec<String>,
442-
/// the number of characters (typically whitespaces) up to the item sigil
446+
/// the number of characters (typically whitespaces) up to the item marker
443447
indent: usize,
444448
/// the string that marks the start of an item
445449
opener: String,
@@ -448,12 +452,33 @@ struct ItemizedBlock {
448452
}
449453

450454
impl ItemizedBlock {
451-
/// Returns the sigil's (e.g. "- ", "* ", or "1. ") length or None if there is no sigil.
452-
fn get_sigil_length(trimmed: &str) -> Option<usize> {
455+
/// Checks whether the `trimmed` line includes an item marker. Returns `None` if there is no
456+
/// marker. Returns the length of the marker if one is present. Note that the length includes
457+
/// the whitespace that follows the marker, for example the marker in `"* list item"` has the
458+
/// length of 2.
459+
///
460+
/// This function recognizes item markers that correspond to CommonMark's
461+
/// ["bullet list marker"](https://spec.commonmark.org/0.30/#bullet-list-marker),
462+
/// ["block quote marker"](https://spec.commonmark.org/0.30/#block-quote-marker), and/or
463+
/// ["ordered list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker).
464+
///
465+
/// Compared to CommonMark specification, the number of digits that are allowed in an ["ordered
466+
/// list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker) is more limited (to at
467+
/// most 2 digits). Limiting the length of the marker helps reduce the risk of recognizing
468+
/// arbitrary numbers as markers. See also
469+
/// https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990 which gives the
470+
/// following example where a number (i.e. "1868") doesn't signify an ordered list:
471+
/// > The Captain died in
472+
/// > 1868. He wes buried in...
473+
fn get_marker_length(trimmed: &str) -> Option<usize> {
474+
// https://spec.commonmark.org/0.30/#bullet-list-marker or
475+
// https://spec.commonmark.org/0.30/#block-quote-marker
453476
if trimmed.starts_with("* ") || trimmed.starts_with("- ") || trimmed.starts_with("> ") {
454477
return Some(2);
455478
}
456479

480+
// https://spec.commonmark.org/0.30/#ordered-list-marker, where at most 2 digits are
481+
// allowed.
457482
for suffix in [". ", ") "] {
458483
if let Some((prefix, _)) = trimmed.split_once(suffix) {
459484
if prefix.len() <= 2 && prefix.chars().all(|c| char::is_ascii_digit(&c)) {
@@ -462,32 +487,31 @@ impl ItemizedBlock {
462487
}
463488
}
464489

465-
None
490+
None // No markers found.
466491
}
467492

468493
/// Creates a new ItemizedBlock described with the given `line`
469494
/// or None if `line` doesn't start an item.
470495
fn new(line: &str) -> Option<ItemizedBlock> {
471-
ItemizedBlock::get_sigil_length(line.trim_start()).map(|sigil_length| {
472-
let space_to_sigil = line.chars().take_while(|c| c.is_whitespace()).count();
473-
let mut indent = space_to_sigil + sigil_length;
474-
let mut line_start = " ".repeat(indent);
475-
476-
// Markdown blockquote start with a "> "
477-
if line.trim_start().starts_with(">") {
478-
// remove the original +2 indent because there might be multiple nested block quotes
479-
// and it's easier to reason about the final indent by just taking the length
480-
// of the new line_start. We update the indent because it effects the max width
481-
// of each formatted line.
482-
line_start = itemized_block_quote_start(line, line_start, 2);
483-
indent = line_start.len();
484-
}
485-
ItemizedBlock {
486-
lines: vec![line[indent..].to_string()],
487-
indent,
488-
opener: line[..indent].to_string(),
489-
line_start,
490-
}
496+
let marker_length = ItemizedBlock::get_marker_length(line.trim_start())?;
497+
let space_to_marker = line.chars().take_while(|c| c.is_whitespace()).count();
498+
let mut indent = space_to_marker + marker_length;
499+
let mut line_start = " ".repeat(indent);
500+
501+
// Markdown blockquote start with a "> "
502+
if line.trim_start().starts_with(">") {
503+
// remove the original +2 indent because there might be multiple nested block quotes
504+
// and it's easier to reason about the final indent by just taking the length
505+
// of the new line_start. We update the indent because it effects the max width
506+
// of each formatted line.
507+
line_start = itemized_block_quote_start(line, line_start, 2);
508+
indent = line_start.len();
509+
}
510+
Some(ItemizedBlock {
511+
lines: vec![line[indent..].to_string()],
512+
indent,
513+
opener: line[..indent].to_string(),
514+
line_start,
491515
})
492516
}
493517

@@ -507,7 +531,7 @@ impl ItemizedBlock {
507531
/// Returns `true` if the line is part of the current itemized block.
508532
/// If it is, then it is added to the internal lines list.
509533
fn add_line(&mut self, line: &str) -> bool {
510-
if ItemizedBlock::get_sigil_length(line.trim_start()).is_none()
534+
if ItemizedBlock::get_marker_length(line.trim_start()).is_none()
511535
&& self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
512536
{
513537
self.lines.push(line.to_string());
@@ -2058,27 +2082,30 @@ fn main() {
20582082

20592083
run_test("1. foo", "foo", 3, "1. ", " ");
20602084
run_test("12. foo", "foo", 4, "12. ", " ");
2085+
run_test("1) foo", "foo", 3, "1) ", " ");
2086+
run_test("12) foo", "foo", 4, "12) ", " ");
20612087

20622088
run_test(" - foo", "foo", 6, " - ", " ");
20632089
}
20642090

20652091
#[test]
2066-
fn test_itemized_block_nonobvious_sigils_are_rejected() {
2092+
fn test_itemized_block_nonobvious_markers_are_rejected() {
20672093
let test_inputs = vec![
2068-
// Non-numeric sigils (e.g. `a.` or `iv.`) are not supported, because of a risk of
2069-
// misidentifying regular words as sigils. See also the discussion in
2070-
// https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
2094+
// Non-numeric item markers (e.g. `a.` or `iv.`) are not allowed by
2095+
// https://spec.commonmark.org/0.30/#ordered-list-marker. We also note that allowing
2096+
// them would risk misidentifying regular words as item markers. See also the
2097+
// discussion in https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
20712098
"word. rest of the paragraph.",
20722099
"a. maybe this is a list item? maybe not?",
20732100
"iv. maybe this is a list item? maybe not?",
2074-
// Numbers with 3 or more digits are not recognized as sigils, to avoid
2101+
// Numbers with 3 or more digits are not recognized as item markers, to avoid
20752102
// formatting the following example as a list:
20762103
//
20772104
// ```
20782105
// The Captain died in
2079-
// 1868. He was buried in...
2106+
// 1868. He was buried in...
20802107
// ```
2081-
"123. only 2-digit numbers are recognized as sigils.",
2108+
"123. only 2-digit numbers are recognized as item markers.",
20822109
// Parens.
20832110
"123) giving some coverage to parens as well.",
20842111
"a) giving some coverage to parens as well.",

0 commit comments

Comments
 (0)