From fc496cf39d2f43a3823a63a7f9d3fb587c64c884 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 8 Oct 2021 20:10:40 +0200 Subject: [PATCH 1/2] Switch tester to parsel --- src/etc/htmldocck.py | 111 ++++++------------------------------------- 1 file changed, 15 insertions(+), 96 deletions(-) diff --git a/src/etc/htmldocck.py b/src/etc/htmldocck.py index 8647db5a45dc8..db2f378300e92 100644 --- a/src/etc/htmldocck.py +++ b/src/etc/htmldocck.py @@ -110,72 +110,9 @@ import re import shlex from collections import namedtuple -try: - from html.parser import HTMLParser -except ImportError: - from HTMLParser import HTMLParser -try: - from xml.etree import cElementTree as ET -except ImportError: - from xml.etree import ElementTree as ET - -try: - from html.entities import name2codepoint -except ImportError: - from htmlentitydefs import name2codepoint - -# "void elements" (no closing tag) from the HTML Standard section 12.1.2 -VOID_ELEMENTS = {'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', - 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr'} - -# Python 2 -> 3 compatibility -try: - unichr -except NameError: - unichr = chr - +from parsel import Selector channel = os.environ["DOC_RUST_LANG_ORG_CHANNEL"] - -class CustomHTMLParser(HTMLParser): - """simplified HTML parser. - - this is possible because we are dealing with very regular HTML from - rustdoc; we only have to deal with i) void elements and ii) empty - attributes.""" - def __init__(self, target=None): - HTMLParser.__init__(self) - self.__builder = target or ET.TreeBuilder() - - def handle_starttag(self, tag, attrs): - attrs = {k: v or '' for k, v in attrs} - self.__builder.start(tag, attrs) - if tag in VOID_ELEMENTS: - self.__builder.end(tag) - - def handle_endtag(self, tag): - self.__builder.end(tag) - - def handle_startendtag(self, tag, attrs): - attrs = {k: v or '' for k, v in attrs} - self.__builder.start(tag, attrs) - self.__builder.end(tag) - - def handle_data(self, data): - self.__builder.data(data) - - def handle_entityref(self, name): - self.__builder.data(unichr(name2codepoint[name])) - - def handle_charref(self, name): - code = int(name[1:], 16) if name.startswith(('x', 'X')) else int(name, 10) - self.__builder.data(unichr(code)) - - def close(self): - HTMLParser.close(self) - return self.__builder.close() - - Command = namedtuple('Command', 'negated cmd args lineno context') @@ -256,29 +193,11 @@ def get_commands(template): yield Command(negated=negated, cmd=cmd, args=args, lineno=lineno+1, context=line) -def _flatten(node, acc): - if node.text: - acc.append(node.text) - for e in node: - _flatten(e, acc) - if e.tail: - acc.append(e.tail) - - -def flatten(node): - acc = [] - _flatten(node, acc) - return ''.join(acc) - - def normalize_xpath(path): path = path.replace("{{channel}}", channel) - if path.startswith('//'): - return '.' + path # avoid warnings - elif path.startswith('.//'): - return path - else: + if not path.startswith('//'): raise InvalidCheck('Non-absolute XPath is not supported due to implementation issues') + return path class CachedFiles(object): @@ -323,7 +242,7 @@ def get_tree(self, path): with io.open(abspath, encoding='utf-8') as f: try: - tree = ET.fromstringlist(f.readlines(), CustomHTMLParser()) + tree = Selector(text=f.read()) except Exception as e: raise RuntimeError('Cannot parse an HTML file {!r}: {}'.format(path, e)) self.trees[path] = tree @@ -351,7 +270,7 @@ def check_string(data, pat, regexp): def check_tree_attr(tree, path, attr, pat, regexp): path = normalize_xpath(path) ret = False - for e in tree.findall(path): + for e in tree.xpath(path): if attr in e.attrib: value = e.attrib[attr] else: @@ -363,19 +282,19 @@ def check_tree_attr(tree, path, attr, pat, regexp): return ret +def flatten(elem): + return ''.join(elem.css('::text').getall()) + + def check_tree_text(tree, path, pat, regexp): path = normalize_xpath(path) ret = False try: - for e in tree.findall(path): - try: - value = flatten(e) - except KeyError: - continue - else: - ret = check_string(value, pat, regexp) - if ret: - break + for e in tree.xpath(path): + value = flatten(e) + ret = check_string(value, pat, regexp) + if ret: + break except Exception: print('Failed to get path "{}"'.format(path)) raise @@ -384,7 +303,7 @@ def check_tree_text(tree, path, pat, regexp): def get_tree_count(tree, path): path = normalize_xpath(path) - return len(tree.findall(path)) + return len(tree.xpath(path)) def stderr(*args): From 44378ac71cae4dfe0df98b1e3bdfbf780d536635 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 8 Oct 2021 20:11:19 +0200 Subject: [PATCH 2/2] Fix errors in tests --- src/test/rustdoc/fn-type.rs | 7 ++++--- src/test/rustdoc/inline_cross/renamed-via-module.rs | 8 ++++---- src/test/rustdoc/intra-doc/private.rs | 6 +++--- src/test/rustdoc/primitive/no_std.rs | 4 ++-- src/test/rustdoc/proc-macro.rs | 2 +- src/test/rustdoc/raw-ident-eliminate-r-hashtag.rs | 6 +++--- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/test/rustdoc/fn-type.rs b/src/test/rustdoc/fn-type.rs index 3959aeb6cfb7f..11811ffe307ee 100644 --- a/src/test/rustdoc/fn-type.rs +++ b/src/test/rustdoc/fn-type.rs @@ -8,6 +8,7 @@ pub struct Foo<'a, T> { pub hrtb_lifetime: for<'b, 'c> fn(one: &'b i32, two: &'c &'b i32) -> (&'b i32, &'c i32), } -// @has 'foo/struct.Foo.html' '//span[@id="structfield.generic"]' "generic: fn(val: &T) -> T" -// @has 'foo/struct.Foo.html' '//span[@id="structfield.lifetime"]' "lifetime: fn(val: &'a i32) -> i32" -// @has 'foo/struct.Foo.html' '//span[@id="structfield.hrtb_lifetime"]' "hrtb_lifetime: for<'b, 'c> fn(one: &'b i32, two: &'c &'b i32) -> (&'b i32, &'c i32)" +// @has 'foo/struct.Foo.html' +// @has - '//span[@id="structfield.generic"]' "generic: fn(val: &T) -> T" +// @has - '//span[@id="structfield.lifetime"]' "lifetime: fn(val: &'a i32) -> i32" +// @has - '//span[@id="structfield.hrtb_lifetime"]' "hrtb_lifetime: for<'b, 'c> fn(one: &'b i32, two: &'c &'b i32) -> (&'b i32, &'c i32)" diff --git a/src/test/rustdoc/inline_cross/renamed-via-module.rs b/src/test/rustdoc/inline_cross/renamed-via-module.rs index cdedbf0707985..d053d659c5228 100644 --- a/src/test/rustdoc/inline_cross/renamed-via-module.rs +++ b/src/test/rustdoc/inline_cross/renamed-via-module.rs @@ -7,16 +7,16 @@ extern crate foo; // @has foo/iter/index.html -// @has - '//a/[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy" -// @has - '//a/[@href="struct.StepBy.html"]' "StepBy" +// @has - '//a[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy" +// @has - '//a[@href="struct.StepBy.html"]' "StepBy" // @has foo/iter/struct.DeprecatedStepBy.html // @has - '//h1' "Struct foo::iter::DeprecatedStepBy" // @has foo/iter/struct.StepBy.html // @has - '//h1' "Struct foo::iter::StepBy" // @has bar/iter/index.html -// @has - '//a/[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy" -// @has - '//a/[@href="struct.StepBy.html"]' "StepBy" +// @has - '//a[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy" +// @has - '//a[@href="struct.StepBy.html"]' "StepBy" // @has bar/iter/struct.DeprecatedStepBy.html // @has - '//h1' "Struct bar::iter::DeprecatedStepBy" // @has bar/iter/struct.StepBy.html diff --git a/src/test/rustdoc/intra-doc/private.rs b/src/test/rustdoc/intra-doc/private.rs index 2756a7998e8ea..0b5882e40fcd0 100644 --- a/src/test/rustdoc/intra-doc/private.rs +++ b/src/test/rustdoc/intra-doc/private.rs @@ -4,9 +4,9 @@ // make sure to update `rustdoc-ui/intra-doc/private.rs` if you update this file /// docs [DontDocMe] [DontDocMe::f] [DontDocMe::x] -// @has private/struct.DocMe.html '//*a[@href="struct.DontDocMe.html"]' 'DontDocMe' -// @has private/struct.DocMe.html '//*a[@href="struct.DontDocMe.html#method.f"]' 'DontDocMe::f' -// @has private/struct.DocMe.html '//*a[@href="struct.DontDocMe.html#structfield.x"]' 'DontDocMe::x' +// @has private/struct.DocMe.html '//a[@href="struct.DontDocMe.html"]' 'DontDocMe' +// @has private/struct.DocMe.html '//a[@href="struct.DontDocMe.html#method.f"]' 'DontDocMe::f' +// @has private/struct.DocMe.html '//a[@href="struct.DontDocMe.html#structfield.x"]' 'DontDocMe::x' pub struct DocMe; struct DontDocMe { x: usize, diff --git a/src/test/rustdoc/primitive/no_std.rs b/src/test/rustdoc/primitive/no_std.rs index f0f70cb6c1881..adcc9556f819d 100644 --- a/src/test/rustdoc/primitive/no_std.rs +++ b/src/test/rustdoc/primitive/no_std.rs @@ -2,8 +2,8 @@ #![deny(warnings)] #![deny(rustdoc::broken_intra_doc_links)] -// @has no_std/fn.foo.html '//a/[@href="{{channel}}/core/primitive.u8.html"]' 'u8' -// @has no_std/fn.foo.html '//a/[@href="{{channel}}/core/primitive.u8.html"]' 'primitive link' +// @has no_std/fn.foo.html '//a[@href="{{channel}}/core/primitive.u8.html"]' 'u8' +// @has no_std/fn.foo.html '//a[@href="{{channel}}/core/primitive.u8.html"]' 'primitive link' /// Link to [primitive link][u8] pub fn foo() -> u8 {} diff --git a/src/test/rustdoc/proc-macro.rs b/src/test/rustdoc/proc-macro.rs index f6d1f2cf91b5f..c8507a625db36 100644 --- a/src/test/rustdoc/proc-macro.rs +++ b/src/test/rustdoc/proc-macro.rs @@ -6,7 +6,7 @@ #![crate_name="some_macros"] // @has some_macros/index.html -// @has - '//a/[@href="attr.some_proc_attr.html"]' 'some_proc_attr' +// @has - '//a[@href="attr.some_proc_attr.html"]' 'some_proc_attr' //! include a link to [some_proc_macro] to make sure it works. diff --git a/src/test/rustdoc/raw-ident-eliminate-r-hashtag.rs b/src/test/rustdoc/raw-ident-eliminate-r-hashtag.rs index ad19036126760..b8133cbf2168d 100644 --- a/src/test/rustdoc/raw-ident-eliminate-r-hashtag.rs +++ b/src/test/rustdoc/raw-ident-eliminate-r-hashtag.rs @@ -8,13 +8,13 @@ pub mod internal { /// /// [name]: mod /// [other name]: crate::internal::mod - // @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//*a[@href="struct.mod.html"]' 'name' - // @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//*a[@href="struct.mod.html"]' 'other name' + // @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//a[@href="struct.mod.html"]' 'name' + // @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//a[@href="struct.mod.html"]' 'other name' pub struct B; } /// See [name]. /// /// [name]: internal::mod -// @has 'raw_ident_eliminate_r_hashtag/struct.A.html' '//*a[@href="internal/struct.mod.html"]' 'name' +// @has 'raw_ident_eliminate_r_hashtag/struct.A.html' '//a[@href="internal/struct.mod.html"]' 'name' pub struct A;