From 84ca399465be4eced934733ef3dc0b48005ba6af Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Mon, 12 Sep 2022 11:11:37 -0700 Subject: [PATCH] rustdoc: improve rustdoc HTML suggestions handling of nested generics Based on some poor suggestions produced when stablizing this lint and running it on `manformed-generics.rs` --- src/librustdoc/passes/html_tags.rs | 83 ++++++++++++++++- .../html-as-generics-no-suggestions.rs | 42 +++++++++ .../html-as-generics-no-suggestions.stderr | 48 ++++++++-- .../suggestions/html-as-generics.fixed | 40 +++++++++ .../suggestions/html-as-generics.rs | 40 +++++++++ .../suggestions/html-as-generics.stderr | 90 ++++++++++++++++++- 6 files changed, 330 insertions(+), 13 deletions(-) diff --git a/src/librustdoc/passes/html_tags.rs b/src/librustdoc/passes/html_tags.rs index f3a3c853caca6..1bce24b0ad3e8 100644 --- a/src/librustdoc/passes/html_tags.rs +++ b/src/librustdoc/passes/html_tags.rs @@ -94,6 +94,34 @@ fn extract_path_backwards(text: &str, end_pos: usize) -> Option { if current_pos == end_pos { None } else { Some(current_pos) } } +fn extract_path_forward(text: &str, start_pos: usize) -> Option { + use rustc_lexer::{is_id_continue, is_id_start}; + let mut current_pos = start_pos; + loop { + if current_pos < text.len() && text[current_pos..].starts_with("::") { + current_pos += 2; + } else { + break; + } + let mut chars = text[current_pos..].chars(); + if let Some(c) = chars.next() { + if is_id_start(c) { + current_pos += c.len_utf8(); + } else { + break; + } + } + while let Some(c) = chars.next() { + if is_id_continue(c) { + current_pos += c.len_utf8(); + } else { + break; + } + } + } + if current_pos == start_pos { None } else { Some(current_pos) } +} + fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool { // https://spec.commonmark.org/0.30/#raw-html // @@ -218,19 +246,68 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> { // If a tag looks like ``, it might actually be a generic. // We don't try to detect stuff `` because that's not valid HTML, // and we don't try to detect stuff `` because that's not valid Rust. - if let Some(Some(generics_start)) = (is_open_tag - && dox[..range.end].ends_with('>')) + let mut generics_end = range.end; + if let Some(Some(mut generics_start)) = (is_open_tag + && dox[..generics_end].ends_with('>')) .then(|| extract_path_backwards(&dox, range.start)) { + while generics_start != 0 + && generics_end < dox.len() + && dox.as_bytes()[generics_start - 1] == b'<' + && dox.as_bytes()[generics_end] == b'>' + { + generics_end += 1; + generics_start -= 1; + if let Some(new_start) = extract_path_backwards(&dox, generics_start) { + generics_start = new_start; + } + if let Some(new_end) = extract_path_forward(&dox, generics_end) { + generics_end = new_end; + } + } + if let Some(new_end) = extract_path_forward(&dox, generics_end) { + generics_end = new_end; + } let generics_sp = match super::source_span_for_markdown_range( tcx, &dox, - &(generics_start..range.end), + &(generics_start..generics_end), &item.attrs, ) { Some(sp) => sp, None => item.attr_span(tcx), }; + // Sometimes, we only extract part of a path. For example, consider this: + // + // <[u32] as IntoIter>::Item + // ^^^^^ unclosed HTML tag `u32` + // + // We don't have any code for parsing fully-qualified trait paths. + // In theory, we could add it, but doing it correctly would require + // parsing the entire path grammar, which is problematic because of + // overlap between the path grammar and Markdown. + // + // The example above shows that ambiguity. Is `[u32]` intended to be an + // intra-doc link to the u32 primitive, or is it intended to be a slice? + // + // If the below conditional were removed, we would suggest this, which is + // not what the user probably wants. + // + // <[u32] as `IntoIter`>::Item + // + // We know that the user actually wants to wrap the whole thing in a code + // block, but the only reason we know that is because `u32` does not, in + // fact, implement IntoIter. If the example looks like this: + // + // <[Vec] as IntoIter::Item + // + // The ideal fix would be significantly different. + if (generics_start > 0 && dox.as_bytes()[generics_start - 1] == b'<') + || (generics_end < dox.len() && dox.as_bytes()[generics_end] == b'>') + { + diag.emit(); + return; + } // multipart form is chosen here because ``Vec`` would be confusing. diag.multipart_suggestion( "try marking as source code", diff --git a/src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.rs b/src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.rs index 744b3071f1b81..476e3b2d43e4a 100644 --- a/src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.rs +++ b/src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.rs @@ -8,6 +8,48 @@ pub struct ConstGeneric; // HTML tags cannot contain commas, so no error. pub struct MultipleGenerics; +/// This <[u32] as Iterator> thing! +//~^ERROR unclosed HTML tag `Item` +// Some forms of fully-qualified path are simultaneously valid HTML tags +// with attributes. They produce an error, but no suggestion, because figuring +// out if this is valid would require parsing the entire path grammar. +// +// The important part is that we don't produce any *wrong* suggestions. +// While several other examples below are added to make sure we don't +// produce suggestions when given complex paths, this example is the actual +// reason behind not just using the real path parser. It's ambiguous: there's +// no way to locally reason out whether that `[u32]` is intended to be a slice +// or an intra-doc link. +pub struct FullyQualifiedPathsDoNotCount; + +/// This ::Iter thing! +//~^ERROR unclosed HTML tag `Vec` +// Some forms of fully-qualified path are simultaneously valid HTML tags +// with attributes. They produce an error, but no suggestion, because figuring +// out if this is valid would require parsing the entire path grammar. +pub struct FullyQualifiedPathsDoNotCount1; + +/// This Vec::Iter thing! +//~^ERROR unclosed HTML tag `Vec` +// Some forms of fully-qualified path are simultaneously valid HTML tags +// with attributes. They produce an error, but no suggestion, because figuring +// out if this is valid would require parsing the entire path grammar. +pub struct FullyQualifiedPathsDoNotCount2; + +/// This Vec thing! +//~^ERROR unclosed HTML tag `Vec` +// Some forms of fully-qualified path are simultaneously valid HTML tags +// with attributes. They produce an error, but no suggestion, because figuring +// out if this is valid would require parsing the entire path grammar. +pub struct FullyQualifiedPathsDoNotCount3; + +/// This Vec as IntoIter> thing! +//~^ERROR unclosed HTML tag `i32` +// Some forms of fully-qualified path are simultaneously valid HTML tags +// with attributes. They produce an error, but no suggestion, because figuring +// out if this is valid would require parsing the entire path grammar. +pub struct FullyQualifiedPathsDoNotCount4; + /// This Vec thing! //~^ERROR unclosed HTML tag `i32` // HTML attributes shouldn't be treated as Rust syntax, so no suggestions. diff --git a/src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.stderr b/src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.stderr index 832b8b2cac79a..3856a251321b2 100644 --- a/src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.stderr +++ b/src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.stderr @@ -1,8 +1,8 @@ -error: unclosed HTML tag `i32` - --> $DIR/html-as-generics-no-suggestions.rs:11:13 +error: unclosed HTML tag `Item` + --> $DIR/html-as-generics-no-suggestions.rs:11:28 | -LL | /// This Vec thing! - | ^^^^ +LL | /// This <[u32] as Iterator> thing! + | ^^^^^^ | note: the lint level is defined here --> $DIR/html-as-generics-no-suggestions.rs:1:9 @@ -10,29 +10,59 @@ note: the lint level is defined here LL | #![deny(rustdoc::invalid_html_tags)] | ^^^^^^^^^^^^^^^^^^^^^^^^^^ +error: unclosed HTML tag `Vec` + --> $DIR/html-as-generics-no-suggestions.rs:25:10 + | +LL | /// This ::Iter thing! + | ^^^^ + +error: unclosed HTML tag `Vec` + --> $DIR/html-as-generics-no-suggestions.rs:32:13 + | +LL | /// This Vec::Iter thing! + | ^^^^ + +error: unclosed HTML tag `Vec` + --> $DIR/html-as-generics-no-suggestions.rs:39:13 + | +LL | /// This Vec thing! + | ^^^^ + +error: unclosed HTML tag `i32` + --> $DIR/html-as-generics-no-suggestions.rs:46:17 + | +LL | /// This Vec as IntoIter> thing! + | ^^^^^ + +error: unclosed HTML tag `i32` + --> $DIR/html-as-generics-no-suggestions.rs:53:13 + | +LL | /// This Vec thing! + | ^^^^ + error: unopened HTML tag `i32` - --> $DIR/html-as-generics-no-suggestions.rs:20:13 + --> $DIR/html-as-generics-no-suggestions.rs:62:13 | LL | /// This Vec thing! | ^^^^^^ error: unclosed HTML tag `i32` - --> $DIR/html-as-generics-no-suggestions.rs:25:13 + --> $DIR/html-as-generics-no-suggestions.rs:67:13 | LL | /// This 123 thing! | ^^^^^ error: unclosed HTML tag `i32` - --> $DIR/html-as-generics-no-suggestions.rs:30:14 + --> $DIR/html-as-generics-no-suggestions.rs:72:14 | LL | /// This Vec: thing! | ^^^^^ error: unclosed HTML tag `i32` - --> $DIR/html-as-generics-no-suggestions.rs:35:39 + --> $DIR/html-as-generics-no-suggestions.rs:77:39 | LL | /// This [link](https://rust-lang.org) thing! | ^^^^^ -error: aborting due to 5 previous errors +error: aborting due to 10 previous errors diff --git a/src/test/rustdoc-ui/suggestions/html-as-generics.fixed b/src/test/rustdoc-ui/suggestions/html-as-generics.fixed index c0a0de24c5263..07c8c9ff254bc 100644 --- a/src/test/rustdoc-ui/suggestions/html-as-generics.fixed +++ b/src/test/rustdoc-ui/suggestions/html-as-generics.fixed @@ -30,3 +30,43 @@ pub struct BareTurbofish; //~^ERROR unclosed HTML tag `i32` //~|HELP try marking as source pub struct Nested; + +/// Nested generics `Vec>` +//~^ ERROR unclosed HTML tag `u32` +//~|HELP try marking as source +pub struct NestedGenerics; + +/// Generics with path `Vec::Iter` +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct GenericsWithPath; + +/// Generics with path `>::Iter` +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithPath; + +/// Generics with path `Vec>::Iter` +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithPath2; + +/// Generics with bump `>`s +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithBump; + +/// Generics with bump `Vec>`s +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithBump2; + +/// Generics with punct `>`! +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithPunct; + +/// Generics with punct `Vec>`! +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithPunct2; diff --git a/src/test/rustdoc-ui/suggestions/html-as-generics.rs b/src/test/rustdoc-ui/suggestions/html-as-generics.rs index 0b6009b0e59c3..cdd652f397ec4 100644 --- a/src/test/rustdoc-ui/suggestions/html-as-generics.rs +++ b/src/test/rustdoc-ui/suggestions/html-as-generics.rs @@ -30,3 +30,43 @@ pub struct BareTurbofish; //~^ERROR unclosed HTML tag `i32` //~|HELP try marking as source pub struct Nested; + +/// Nested generics Vec> +//~^ ERROR unclosed HTML tag `u32` +//~|HELP try marking as source +pub struct NestedGenerics; + +/// Generics with path Vec::Iter +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct GenericsWithPath; + +/// Generics with path >::Iter +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithPath; + +/// Generics with path Vec>::Iter +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithPath2; + +/// Generics with bump >s +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithBump; + +/// Generics with bump Vec>s +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithBump2; + +/// Generics with punct >! +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithPunct; + +/// Generics with punct Vec>! +//~^ ERROR unclosed HTML tag `i32` +//~|HELP try marking as source +pub struct NestedGenericsWithPunct2; diff --git a/src/test/rustdoc-ui/suggestions/html-as-generics.stderr b/src/test/rustdoc-ui/suggestions/html-as-generics.stderr index df54b71264ebc..211dd4210d50c 100644 --- a/src/test/rustdoc-ui/suggestions/html-as-generics.stderr +++ b/src/test/rustdoc-ui/suggestions/html-as-generics.stderr @@ -69,5 +69,93 @@ help: try marking as source code LL | /// This `Vec::` thing! | + + -error: aborting due to 6 previous errors +error: unclosed HTML tag `u32` + --> $DIR/html-as-generics.rs:34:28 + | +LL | /// Nested generics Vec> + | ^^^^^ + | +help: try marking as source code + | +LL | /// Nested generics `Vec>` + | + + + +error: unclosed HTML tag `i32` + --> $DIR/html-as-generics.rs:39:27 + | +LL | /// Generics with path Vec::Iter + | ^^^^^ + | +help: try marking as source code + | +LL | /// Generics with path `Vec::Iter` + | + + + +error: unclosed HTML tag `i32` + --> $DIR/html-as-generics.rs:44:28 + | +LL | /// Generics with path >::Iter + | ^^^^^ + | +help: try marking as source code + | +LL | /// Generics with path `>::Iter` + | + + + +error: unclosed HTML tag `i32` + --> $DIR/html-as-generics.rs:49:31 + | +LL | /// Generics with path Vec>::Iter + | ^^^^^ + | +help: try marking as source code + | +LL | /// Generics with path `Vec>::Iter` + | + + + +error: unclosed HTML tag `i32` + --> $DIR/html-as-generics.rs:54:28 + | +LL | /// Generics with bump >s + | ^^^^^ + | +help: try marking as source code + | +LL | /// Generics with bump `>`s + | + + + +error: unclosed HTML tag `i32` + --> $DIR/html-as-generics.rs:59:31 + | +LL | /// Generics with bump Vec>s + | ^^^^^ + | +help: try marking as source code + | +LL | /// Generics with bump `Vec>`s + | + + + +error: unclosed HTML tag `i32` + --> $DIR/html-as-generics.rs:64:29 + | +LL | /// Generics with punct >! + | ^^^^^ + | +help: try marking as source code + | +LL | /// Generics with punct `>`! + | + + + +error: unclosed HTML tag `i32` + --> $DIR/html-as-generics.rs:69:32 + | +LL | /// Generics with punct Vec>! + | ^^^^^ + | +help: try marking as source code + | +LL | /// Generics with punct `Vec>`! + | + + + +error: aborting due to 14 previous errors