diff --git a/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart b/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart
index 7e4bd7710b33..768f844a1756 100644
--- a/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart
+++ b/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart
@@ -13,15 +13,51 @@ const _desc = r'Use of angle brackets in a doc comment is treated as HTML by '
'Markdown.';
const _details = r'''
-**DO** reference only in-scope identifiers in doc comments.
+**DON'T** use angle-bracketed text, `<…>`, in a doc comment unless you want to
+write an HTML tag or link.
-When a developer writes a reference with angle brackets within a doc comment,
-the angle brackets are interpreted as HTML. The text within pairs of opening and
-closing angle brackets generally get swallowed by the browser, and will not be
-displayed.
+Markdown allows HTML tags as part of the Markdown code, so you can write, for
+example, `T1`. Markdown does not restrict the allowed tags, it just
+includes the tags verbatim in the output.
-You can use a code block or code span to wrap the text containing angle
-brackets. You can also replace `<` with `<` and `>` with `>`.
+Dartdoc only allows some known and valid HTML tags, and will omit any disallowed
+HTML tag from the output. See the list of allowed tags and directives below.
+Your doc comment should not contain any HTML tags that are not on this list.
+
+Markdown also allows you to write an "auto-link" to an URL as for example
+``, delimited only by `<...>`. Such a link is
+allowed by Dartdoc as well.
+A `<...>` delimited text is an auto-link if it is a valid absolute URL, starting
+with a scheme of at least two characters followed by a colon, like
+``.
+
+Any other other occurrence of `` or `` is likely a mistake
+and this lint will warn about it.
+If something looks like an HTML tag, meaning it starts with `<` or ``
+and then a letter, and it has a later matching `>`, then it's considered an
+invalid HTML tag unless it is an auto-link, or it starts with an *allowed*
+HTML tag.
+
+Such a mistake can, for example, happen if writing Dart code with type arguments
+outside of a code span, for example `The type List is ...`, where ``
+looks like an HTML tag. Missing the end quote of a code span can have the same
+effect: ``The type `List is ...`` will also treat `` as an HTML tag.
+
+Allowed HTML directives are: HTML comments, ``, processing
+instructions, `...?>`, CDATA-sections, `<[CDATA...]>`, and the allowed HTML
+tags are:
+`a`, `abbr`, `address`, `area`, `article`, `aside`, `audio`, `b`,
+`bdi`, `bdo`, `blockquote`, `br`, `button`, `canvas`, `caption`,
+`cite`, `code`, `col`, `colgroup`, `data`, `datalist`, `dd`, `del`,
+`dfn`, `div`, `dl`, `dt`, `em`, `fieldset`, `figcaption`, `figure`,
+`footer`, `form`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `header`, `hr`,
+`i`, `iframe`, `img`, `input`, `ins`, `kbd`, `keygen`, `label`,
+`legend`, `li`, `link`, `main`, `map`, `mark`, `meta`, `meter`, `nav`,
+`noscript`, `object`, `ol`, `optgroup`, `option`, `output`, `p`,
+`param`, `pre`, `progress`, `q`, `s`, `samp`, `script`, `section`,
+`select`, `small`, `source`, `span`, `strong`, `style`, `sub`, `sup`,
+`table`, `tbody`, `td`, `template`, `textarea`, `tfoot`, `th`, `thead`,
+`time`, `title`, `tr`, `track`, `u`, `ul`, `var`, `video` and `wbr`.
**BAD:**
```dart
@@ -172,21 +208,6 @@ class _UnintendedTag {
}
class _Visitor extends SimpleAstVisitor {
- // Matches autolinks: starting angle bracket, starting alphabetic character,
- // any alphabetic character or `-`, `+`, `.`, a semi-colon with optionally two
- // `/`s then anything but whitespace until a closing angle bracket.
- static final _autoLinkPattern =
- RegExp(r'<(([a-zA-Z][a-zA-Z\-\+\.]+):(?://)?[^\s>]*)>');
-
- // Matches codespans: starting backtick with anything but a backtick until a
- // closing backtick.
- static final _codeSpanPattern = RegExp(r'`([^`]+)`');
-
- // Matches unintential tags: starting `>`, optionally an opening `/` then one
- // or more valid tag characters then anything but a `>` until a closing `>`.
- static final _nonHtmlPattern =
- RegExp("<(?!/?(${_validHtmlTags.join("|")})[>])[^>]*[>]");
-
final LintRule rule;
_Visitor(this.rule);
@@ -215,18 +236,50 @@ class _Visitor extends SimpleAstVisitor {
/// Finds tags that are not valid HTML tags, not contained in a code span, and
/// are not autolinks.
List<_UnintendedTag> _findUnintendedHtmlTags(String text) {
- var codeSpanOrAutoLink = [
- ..._codeSpanPattern.allMatches(text),
- ..._autoLinkPattern.allMatches(text)
- ];
- var unintendedHtmlTags = _nonHtmlPattern.allMatches(text);
+ var markdownTokenPattern = RegExp(
+ // Escaped Markdown character.
+ r'\\.'
+
+ // Or code span, from "`"*N to "`"*N or just the start if it's
+ // unterminated, to avoid "```a``" matching the "``a``".
+ // The ```-sequence is atomic.
+ r'|(?`+)(?:[^]*?\k)?'
+
+ // Or autolink, start with scheme + `:`.
+ r'|<[a-z][a-z\d\-+.]+:[^\x00-\x20\x7f<>]*>'
+
+ // Or HTML comments.
+ r'|)'
+
+ // Or HTML declarations.
+ r'|'
+
+ // Or HTML processing instructions.
+ r'|<\?[^]*?\?>'
+
+ // Or HTML CDATA sections sections.
+ r'|<\[CDATA[^]*\]>'
+
+ // Or valid HTML tag.
+ // Matches ``, ``, ``, ``
+ // and `.
+ r'|<(?/?)(?:'
+ '${_validHtmlTags.join('|')}'
+ r')'
+ r'(?:/(?=\k)>|>|[\x20\r\n\t][^]*?>)'
+
+ // Or any of the following matches which are considered invalid tags.
+ // If the "nh" capture group is participating, one of these matched.
+ r'|(?)(?:'
+
+ // Any other `?tag ...>` sequence.
+ r'?[a-z][^]*?>'
+ r')', caseSensitive: false);
var matches = <_UnintendedTag>[];
- for (var htmlTag in unintendedHtmlTags) {
- // If the tag is in a code span or is an autolink, we won't report it.
- if (!codeSpanOrAutoLink.any((match) =>
- match.start <= htmlTag.start && htmlTag.end <= match.end)) {
- matches.add(_UnintendedTag(htmlTag.start, htmlTag.end - htmlTag.start));
+ for (var match in markdownTokenPattern.allMatches(text)) {
+ if (match.namedGroup('nh') != null) {
+ matches.add(_UnintendedTag(match.start, match.end - match.start));
}
}
return matches;
diff --git a/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart b/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart
index 249aa32de94c..09bb8c9fff6d 100644
--- a/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart
+++ b/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart
@@ -52,6 +52,23 @@ class C {}
''');
}
+ test_codeSpan_backSlashEscaped() async {
+ await assertDiagnostics(r'''
+/// \\\`List `
+class C {}
+''', [
+ lint(12, 5), //
+ lint(18, 5), //
+ ]);
+ }
+
+ test_codeSpan_multiple() async {
+ await assertNoDiagnostics(r'''
+/// `<` or `>`
+class C {}
+''');
+ }
+
test_hangingAngleBracket_left() async {
await assertNoDiagnostics(r'''
/// n < 12
@@ -66,6 +83,34 @@ class C {}
''');
}
+ test_html_cData() async {
+ await assertNoDiagnostics(r'''
+/// <[CDATA[aaa]]>
+class C {}
+''');
+ }
+
+ test_html_comment() async {
+ await assertNoDiagnostics(r'''
+///
+class C {}
+''');
+ }
+
+ test_html_declaration() async {
+ await assertNoDiagnostics(r'''
+///
+class C {}
+''');
+ }
+
+ test_html_processingInstruction() async {
+ await assertNoDiagnostics(r'''
+///
+class C {}
+''');
+ }
+
test_notDocComment() async {
await assertNoDiagnostics(r'''
// List
@@ -73,6 +118,13 @@ class C {}
''');
}
+ test_notHtml_space() async {
+ await assertNoDiagnostics(r'''
+/// n < 0 || n > 512
+class C {}
+''');
+ }
+
test_unintendedHtml() async {
await assertDiagnostics(r'''
/// Text List.
@@ -153,15 +205,6 @@ class C {}
]);
}
- test_unintendedHtml_notIdentifier() async {
- await assertDiagnostics(r'''
-/// n < 0 || n > 512
-class C {}
-''', [
- lint(6, 10), // < 0 || n >
- ]);
- }
-
test_unintendedHtml_reference() async {
await assertDiagnostics(r'''
/// Text [List].