From 0a285a8f758daa05ee5f30479a0f66e15a8a2c7a Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Sun, 23 Jan 2022 01:17:25 +0300 Subject: [PATCH] Fix bug 27 of #17340 Fixes silent disappearance of Markdown (pseudo-)link when it's detected as unsafe protocol. Now it will be converted to plain text in spirit of [the specification](https://spec.commonmark.org/0.30/#links). For that sake the check for protocol is added to rst.nim also. --- lib/packages/docutils/rst.nim | 66 +++++++++++++++++++++++--------- lib/packages/docutils/rstgen.nim | 17 ++------ tests/stdlib/trst.nim | 27 ++++++++++--- tests/stdlib/trstgen.nim | 11 +++++- 4 files changed, 81 insertions(+), 40 deletions(-) diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index 93c2231dc63de..d4bfae48fc670 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -222,7 +222,7 @@ import os, strutils, rstast, dochelpers, std/enumutils, algorithm, lists, sequtils, - std/private/miscdollars, tables + std/private/miscdollars, tables, strscans from highlite import SourceLanguage, getSourceLanguage type @@ -1347,7 +1347,20 @@ proc match(p: RstParser, start: int, expr: string): bool = inc i result = true -proc fixupEmbeddedRef(n, a, b: PRstNode) = +proc safeProtocol*(linkStr: var string): string = + # Returns link's protocol and, if it's not safe, clears `linkStr` + result = "" + if scanf(linkStr, "$w:", result): + # if it has a protocol at all, ensure that it's not 'javascript:' or worse: + if cmpIgnoreCase(result, "http") == 0 or + cmpIgnoreCase(result, "https") == 0 or + cmpIgnoreCase(result, "ftp") == 0: + discard "it's fine" + else: + linkStr = "" + +proc fixupEmbeddedRef(p: var RstParser, n, a, b: PRstNode): bool = + # Returns `true` if the link belongs to an allowed protocol var sep = - 1 for i in countdown(n.len - 2, 0): if n.sons[i].text == "<": @@ -1355,7 +1368,15 @@ proc fixupEmbeddedRef(n, a, b: PRstNode) = break var incr = if sep > 0 and n.sons[sep - 1].text[0] == ' ': 2 else: 1 for i in countup(0, sep - incr): a.add(n.sons[i]) - for i in countup(sep + 1, n.len - 2): b.add(n.sons[i]) + var linkStr = "" + for i in countup(sep + 1, n.len - 2): linkStr.add(n.sons[i].addNodes) + if linkStr != "": + let protocol = safeProtocol(linkStr) + result = linkStr != "" + if not result: + rstMessage(p, mwBrokenLink, protocol, + p.tok[p.idx-3].line, p.tok[p.idx-3].col) + b.add newLeaf(linkStr) proc whichRole(p: RstParser, sym: string): RstNodeKind = result = whichRoleAux(sym) @@ -1407,14 +1428,17 @@ proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode = if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">": var a = newRstNode(rnInner) var b = newRstNode(rnInner) - fixupEmbeddedRef(n, a, b) - if a.len == 0: - newKind = rnStandaloneHyperlink - newSons = @[b] - else: - newKind = rnHyperlink - newSons = @[a, b] - setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias) + if fixupEmbeddedRef(p, n, a, b): + if a.len == 0: # e.g. ``_ + newKind = rnStandaloneHyperlink + newSons = @[b] + else: # e.g. `link title `_ + newKind = rnHyperlink + newSons = @[a, b] + setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias) + else: # include as plain text, not a link + newKind = rnInner + newSons = n.sons result = newRstNode(newKind, newSons) else: # some link that will be resolved in `resolveSubs` newKind = rnRef @@ -1623,7 +1647,6 @@ proc parseMarkdownCodeblock(p: var RstParser): PRstNode = result.add(lb) proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool = - result = true var desc, link = "" var i = p.idx @@ -1642,14 +1665,21 @@ proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool = parse("]", desc) if p.tok[i].symbol != "(": return false + let linkIdx = i + 1 parse(")", link) - let child = newRstNode(rnHyperlink) - child.add desc - child.add link # only commit if we detected no syntax error: - father.add child - p.idx = i - result = true + let protocol = safeProtocol(link) + if link == "": + result = false + rstMessage(p, mwBrokenLink, protocol, + p.tok[linkIdx].line, p.tok[linkIdx].col) + else: + let child = newRstNode(rnHyperlink) + child.add desc + child.add link + father.add child + p.idx = i + result = true proc getFootnoteType(label: PRstNode): (FootnoteType, int) = if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim index d2180cb91b19e..d662a667c0ce0 100644 --- a/lib/packages/docutils/rstgen.nim +++ b/lib/packages/docutils/rstgen.nim @@ -40,7 +40,7 @@ ## can be done by simply searching for [footnoteName]. import strutils, os, hashes, strtabs, rstast, rst, highlite, tables, sequtils, - algorithm, parseutils, std/strbasics, strscans + algorithm, parseutils, std/strbasics import ../../std/private/since @@ -826,17 +826,6 @@ proc renderOverline(d: PDoc, n: PRstNode, result: var string) = "\\rstov$4[$5]{$3}$2\n", [$n.level, rstnodeToRefname(n).idS, tmp, $chr(n.level - 1 + ord('A')), tocName]) - -proc safeProtocol(linkStr: var string) = - var protocol = "" - if scanf(linkStr, "$w:", protocol): - # if it has a protocol at all, ensure that it's not 'javascript:' or worse: - if cmpIgnoreCase(protocol, "http") == 0 or cmpIgnoreCase(protocol, "https") == 0 or - cmpIgnoreCase(protocol, "ftp") == 0: - discard "it's fine" - else: - linkStr = "" - proc renderTocEntry(d: PDoc, e: TocEntry, result: var string) = dispA(d.target, result, "
  • $2
  • \n", @@ -901,7 +890,7 @@ proc renderImage(d: PDoc, n: PRstNode, result: var string) = # support for `:target:` links for images: var target = esc(d.target, getFieldValue(n, "target").strip(), escMode=emUrl) - safeProtocol(target) + discard safeProtocol(target) if target.len > 0: # `htmlOut` needs to be of the following format for link to work for images: @@ -1205,7 +1194,7 @@ proc renderHyperlink(d: PDoc, text, link: PRstNode, result: var string, d.escMode = emUrl renderRstToOut(d, link, linkStr) d.escMode = mode - safeProtocol(linkStr) + discard safeProtocol(linkStr) var textStr = "" renderRstToOut(d, text, textStr) let nimDocStr = if nimdoc: " nimdoc" else: "" diff --git a/tests/stdlib/trst.nim b/tests/stdlib/trst.nim index 771e024773504..8b98d1403076d 100644 --- a/tests/stdlib/trst.nim +++ b/tests/stdlib/trst.nim @@ -843,12 +843,7 @@ suite "Warnings": rnInner rnLeaf 'foo' rnInner - rnLeaf '#' - rnLeaf 'foo' - rnLeaf ',' - rnLeaf 'string' - rnLeaf ',' - rnLeaf 'string' + rnLeaf '#foo,string,string' rnParagraph anchor='foo' rnLeaf 'Paragraph' rnLeaf '.' @@ -1256,3 +1251,23 @@ suite "RST inline markup": rnLeaf 'my {link example' rnLeaf 'http://example.com/bracket_(symbol_[)' """) + + test "not a Markdown link": + # bug #17340 (27) `f` will be considered as a protocol and blocked as unsafe + var warnings = new seq[string] + check("[T](f: var Foo)".toAst(warnings = warnings) == + dedent""" + rnInner + rnLeaf '[' + rnLeaf 'T' + rnLeaf ']' + rnLeaf '(' + rnLeaf 'f' + rnLeaf ':' + rnLeaf ' ' + rnLeaf 'var' + rnLeaf ' ' + rnLeaf 'Foo' + rnLeaf ')' + """) + check(warnings[] == @["input(1, 5) Warning: broken link 'f'"]) diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim index 995c0a1519042..2c28fe506281a 100644 --- a/tests/stdlib/trstgen.nim +++ b/tests/stdlib/trstgen.nim @@ -1593,8 +1593,15 @@ suite "invalid targets": test "invalid links": check("(([Nim](https://nim-lang.org/)))".toHtml == """((Nim))""") - check("(([Nim](javascript://nim-lang.org/)))".toHtml == - """((Nim))""") + # unknown protocol is treated just like plain text, not a link + var warnings = new seq[string] + check("(([Nim](javascript://nim-lang.org/)))".toHtml(warnings=warnings) == + """(([Nim](javascript://nim-lang.org/)))""") + check(warnings[] == @["input(1, 9) Warning: broken link 'javascript'"]) + warnings[].setLen 0 + check("`Nim `_".toHtml(warnings=warnings) == + """Nim <javascript://nim-lang.org/>""") + check(warnings[] == @["input(1, 33) Warning: broken link 'javascript'"]) suite "local file inclusion": test "cannot include files in sandboxed mode":