diff --git a/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart b/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart index 7e4bd7710b33..768f844a1756 100644 --- a/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart +++ b/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart @@ -13,15 +13,51 @@ const _desc = r'Use of angle brackets in a doc comment is treated as HTML by ' 'Markdown.'; const _details = r''' -**DO** reference only in-scope identifiers in doc comments. +**DON'T** use angle-bracketed text, `<…>`, in a doc comment unless you want to +write an HTML tag or link. -When a developer writes a reference with angle brackets within a doc comment, -the angle brackets are interpreted as HTML. The text within pairs of opening and -closing angle brackets generally get swallowed by the browser, and will not be -displayed. +Markdown allows HTML tags as part of the Markdown code, so you can write, for +example, `T1`. Markdown does not restrict the allowed tags, it just +includes the tags verbatim in the output. -You can use a code block or code span to wrap the text containing angle -brackets. You can also replace `<` with `<` and `>` with `>`. +Dartdoc only allows some known and valid HTML tags, and will omit any disallowed +HTML tag from the output. See the list of allowed tags and directives below. +Your doc comment should not contain any HTML tags that are not on this list. + +Markdown also allows you to write an "auto-link" to an URL as for example +``, delimited only by `<...>`. Such a link is +allowed by Dartdoc as well. +A `<...>` delimited text is an auto-link if it is a valid absolute URL, starting +with a scheme of at least two characters followed by a colon, like +``. + +Any other other occurrence of `` or `` is likely a mistake +and this lint will warn about it. +If something looks like an HTML tag, meaning it starts with `<` or ``, then it's considered an +invalid HTML tag unless it is an auto-link, or it starts with an *allowed* +HTML tag. + +Such a mistake can, for example, happen if writing Dart code with type arguments +outside of a code span, for example `The type List is ...`, where `` +looks like an HTML tag. Missing the end quote of a code span can have the same +effect: ``The type `List is ...`` will also treat `` as an HTML tag. + +Allowed HTML directives are: HTML comments, ``, processing +instructions, ``, CDATA-sections, `<[CDATA...]>`, and the allowed HTML +tags are: +`a`, `abbr`, `address`, `area`, `article`, `aside`, `audio`, `b`, +`bdi`, `bdo`, `blockquote`, `br`, `button`, `canvas`, `caption`, +`cite`, `code`, `col`, `colgroup`, `data`, `datalist`, `dd`, `del`, +`dfn`, `div`, `dl`, `dt`, `em`, `fieldset`, `figcaption`, `figure`, +`footer`, `form`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `header`, `hr`, +`i`, `iframe`, `img`, `input`, `ins`, `kbd`, `keygen`, `label`, +`legend`, `li`, `link`, `main`, `map`, `mark`, `meta`, `meter`, `nav`, +`noscript`, `object`, `ol`, `optgroup`, `option`, `output`, `p`, +`param`, `pre`, `progress`, `q`, `s`, `samp`, `script`, `section`, +`select`, `small`, `source`, `span`, `strong`, `style`, `sub`, `sup`, +`table`, `tbody`, `td`, `template`, `textarea`, `tfoot`, `th`, `thead`, +`time`, `title`, `tr`, `track`, `u`, `ul`, `var`, `video` and `wbr`. **BAD:** ```dart @@ -172,21 +208,6 @@ class _UnintendedTag { } class _Visitor extends SimpleAstVisitor { - // Matches autolinks: starting angle bracket, starting alphabetic character, - // any alphabetic character or `-`, `+`, `.`, a semi-colon with optionally two - // `/`s then anything but whitespace until a closing angle bracket. - static final _autoLinkPattern = - RegExp(r'<(([a-zA-Z][a-zA-Z\-\+\.]+):(?://)?[^\s>]*)>'); - - // Matches codespans: starting backtick with anything but a backtick until a - // closing backtick. - static final _codeSpanPattern = RegExp(r'`([^`]+)`'); - - // Matches unintential tags: starting `>`, optionally an opening `/` then one - // or more valid tag characters then anything but a `>` until a closing `>`. - static final _nonHtmlPattern = - RegExp("<(?!/?(${_validHtmlTags.join("|")})[>])[^>]*[>]"); - final LintRule rule; _Visitor(this.rule); @@ -215,18 +236,50 @@ class _Visitor extends SimpleAstVisitor { /// Finds tags that are not valid HTML tags, not contained in a code span, and /// are not autolinks. List<_UnintendedTag> _findUnintendedHtmlTags(String text) { - var codeSpanOrAutoLink = [ - ..._codeSpanPattern.allMatches(text), - ..._autoLinkPattern.allMatches(text) - ]; - var unintendedHtmlTags = _nonHtmlPattern.allMatches(text); + var markdownTokenPattern = RegExp( + // Escaped Markdown character. + r'\\.' + + // Or code span, from "`"*N to "`"*N or just the start if it's + // unterminated, to avoid "```a``" matching the "``a``". + // The ```-sequence is atomic. + r'|(?`+)(?:[^]*?\k)?' + + // Or autolink, start with scheme + `:`. + r'|<[a-z][a-z\d\-+.]+:[^\x00-\x20\x7f<>]*>' + + // Or HTML comments. + r'|)' + + // Or HTML declarations. + r'|' + + // Or HTML processing instructions. + r'|<\?[^]*?\?>' + + // Or HTML CDATA sections sections. + r'|<\[CDATA[^]*\]>' + + // Or valid HTML tag. + // Matches ``, ``, ``, `` + // and `. + r'|<(?/?)(?:' + '${_validHtmlTags.join('|')}' + r')' + r'(?:/(?=\k)>|>|[\x20\r\n\t][^]*?>)' + + // Or any of the following matches which are considered invalid tags. + // If the "nh" capture group is participating, one of these matched. + r'|(?)(?:' + + // Any other `` sequence. + r'' + r')', caseSensitive: false); var matches = <_UnintendedTag>[]; - for (var htmlTag in unintendedHtmlTags) { - // If the tag is in a code span or is an autolink, we won't report it. - if (!codeSpanOrAutoLink.any((match) => - match.start <= htmlTag.start && htmlTag.end <= match.end)) { - matches.add(_UnintendedTag(htmlTag.start, htmlTag.end - htmlTag.start)); + for (var match in markdownTokenPattern.allMatches(text)) { + if (match.namedGroup('nh') != null) { + matches.add(_UnintendedTag(match.start, match.end - match.start)); } } return matches; diff --git a/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart b/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart index 249aa32de94c..09bb8c9fff6d 100644 --- a/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart +++ b/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart @@ -52,6 +52,23 @@ class C {} '''); } + test_codeSpan_backSlashEscaped() async { + await assertDiagnostics(r''' +/// \\\`List ` +class C {} +''', [ + lint(12, 5), // + lint(18, 5), // + ]); + } + + test_codeSpan_multiple() async { + await assertNoDiagnostics(r''' +/// `<` or `>` +class C {} +'''); + } + test_hangingAngleBracket_left() async { await assertNoDiagnostics(r''' /// n < 12 @@ -66,6 +83,34 @@ class C {} '''); } + test_html_cData() async { + await assertNoDiagnostics(r''' +/// <[CDATA[aaa]]> +class C {} +'''); + } + + test_html_comment() async { + await assertNoDiagnostics(r''' +/// +class C {} +'''); + } + + test_html_declaration() async { + await assertNoDiagnostics(r''' +/// +class C {} +'''); + } + + test_html_processingInstruction() async { + await assertNoDiagnostics(r''' +/// +class C {} +'''); + } + test_notDocComment() async { await assertNoDiagnostics(r''' // List @@ -73,6 +118,13 @@ class C {} '''); } + test_notHtml_space() async { + await assertNoDiagnostics(r''' +/// n < 0 || n > 512 +class C {} +'''); + } + test_unintendedHtml() async { await assertDiagnostics(r''' /// Text List. @@ -153,15 +205,6 @@ class C {} ]); } - test_unintendedHtml_notIdentifier() async { - await assertDiagnostics(r''' -/// n < 0 || n > 512 -class C {} -''', [ - lint(6, 10), // < 0 || n > - ]); - } - test_unintendedHtml_reference() async { await assertDiagnostics(r''' /// Text [List].