Skip to content

Commit

Permalink
Auto merge of #41992 - ollie27:linkchecker_base, r=alexcrichton
Browse files Browse the repository at this point in the history
linkchecker: Add support for <base> tag

Add support for the HTML <base> tag as used by mdBook so The Unstable
Book can be checked.

Also cleanup a few things:
* Stop checking the name attribute. It should never have been used and
mdBook has since been fixed not to use it.
* Make sure we only check html files.
* Remove a few unnecessary allocations.

Finally, dead links in The Unstable Book have been fixed.
  • Loading branch information
bors committed May 15, 2017
2 parents ac254fb + d4f20eb commit 75b0568
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The tracking issue for this feature is: [#23121]

[#23121]: https://github.com/rust-lang/rust/issues/23121

See also [`slice_patterns`](slice-patterns.html).
See also [`slice_patterns`](language-features/slice-patterns.html).

------------------------

Expand Down
2 changes: 1 addition & 1 deletion src/doc/unstable-book/src/language-features/asm.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,4 @@ constraints, etc.
[llvm-docs]: http://llvm.org/docs/LangRef.html#inline-assembler-expressions

If you need more power and don't mind losing some of the niceties of
`asm!`, check out [global_asm](global_asm.html).
`asm!`, check out [global_asm](language-features/global_asm.html).
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The tracking issue for this feature is: [#29641]

[#29641]: https://github.com/rust-lang/rust/issues/29641

See also [`box_syntax`](box-syntax.html)
See also [`box_syntax`](language-features/box-syntax.html)

------------------------

Expand Down
2 changes: 1 addition & 1 deletion src/doc/unstable-book/src/language-features/box-syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The tracking issue for this feature is: [#27779]

[#27779]: https://github.com/rust-lang/rust/issues/27779

See also [`box_patterns`](box-patterns.html)
See also [`box_patterns`](language-features/box-patterns.html)

------------------------

Expand Down
4 changes: 2 additions & 2 deletions src/doc/unstable-book/src/language-features/global_asm.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,5 @@ usages and placed the larger, single usage in the crate root.

If you don't need quite as much power and flexibility as
`global_asm!` provides, and you don't mind restricting your inline
assembly to `fn` bodies only, you might try the [asm](asm.html)
feature instead.
assembly to `fn` bodies only, you might try the
[asm](language-features/asm.html) feature instead.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ This feature is part of "compiler plugins." It will often be used with the
[`plugin`] and `rustc_private` features as well. For more details, see
their docs.

[`plugin`]: plugin.html
[`plugin`]: language-features/plugin.html

------------------------
2 changes: 1 addition & 1 deletion src/doc/unstable-book/src/language-features/plugin.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ The tracking issue for this feature is: [#29597]
This feature is part of "compiler plugins." It will often be used with the
[`plugin_registrar`] and `rustc_private` features.

[`plugin_registrar`]: plugin-registrar.html
[`plugin_registrar`]: language-features/plugin-registrar.html

------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ The tracking issue for this feature is: [#23121]

[#23121]: https://github.com/rust-lang/rust/issues/23121

See also [`advanced_slice_patterns`](advanced-slice-patterns.html).
See also
[`advanced_slice_patterns`](language-features/advanced-slice-patterns.html).

------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The tracking issue for this feature is: [#33082]

[#33082]: https://github.com/rust-lang/rust/issues/33082

See also [`alloc_system`](alloc-system.html).
See also [`alloc_system`](library-features/alloc-system.html).

------------------------

Expand Down
2 changes: 1 addition & 1 deletion src/doc/unstable-book/src/library-features/alloc-system.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The tracking issue for this feature is: [#33082]

[#33082]: https://github.com/rust-lang/rust/issues/33082

See also [`alloc_jemalloc`](alloc-jemalloc.html).
See also [`alloc_jemalloc`](library-features/alloc-jemalloc.html).

------------------------

Expand Down
89 changes: 33 additions & 56 deletions src/tools/linkchecker/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ macro_rules! t {
}

fn main() {
let docs = env::args().nth(1).unwrap();
let docs = env::args_os().nth(1).unwrap();
let docs = env::current_dir().unwrap().join(docs);
let mut errors = false;
walk(&mut HashMap::new(), &docs, &docs, &mut errors);
Expand All @@ -65,15 +65,14 @@ enum Redirect {
struct FileEntry {
source: String,
ids: HashSet<String>,
names: HashSet<String>,
}

type Cache = HashMap<PathBuf, FileEntry>;

impl FileEntry {
fn parse_ids(&mut self, file: &Path, contents: &str, errors: &mut bool) {
if self.ids.is_empty() {
with_attrs_in_source(contents, " id", |fragment, i| {
with_attrs_in_source(contents, " id", |fragment, i, _| {
let frag = fragment.trim_left_matches("#").to_owned();
if !self.ids.insert(frag) {
*errors = true;
Expand All @@ -82,15 +81,6 @@ impl FileEntry {
});
}
}

fn parse_names(&mut self, contents: &str) {
if self.names.is_empty() {
with_attrs_in_source(contents, " name", |fragment, _| {
let frag = fragment.trim_left_matches("#").to_owned();
self.names.insert(frag);
});
}
}
}

fn walk(cache: &mut Cache, root: &Path, dir: &Path, errors: &mut bool) {
Expand All @@ -116,15 +106,8 @@ fn check(cache: &mut Cache,
file: &Path,
errors: &mut bool)
-> Option<PathBuf> {
// ignore js files as they are not prone to errors as the rest of the
// documentation is and they otherwise bring up false positives.
if file.extension().and_then(|s| s.to_str()) == Some("js") {
return None;
}

// ignore handlebars files as they use {{}} to build links, we only
// want to test the generated files
if file.extension().and_then(|s| s.to_str()) == Some("hbs") {
// Ignore none HTML files.
if file.extension().and_then(|s| s.to_str()) != Some("html") {
return None;
}

Expand All @@ -147,13 +130,7 @@ fn check(cache: &mut Cache,
return None;
}

// mdbook uses the HTML <base> tag to handle links for subdirectories, which
// linkchecker doesn't support
if file.to_str().unwrap().contains("unstable-book") {
return None;
}

let res = load_file(cache, root, PathBuf::from(file), SkipRedirect);
let res = load_file(cache, root, file, SkipRedirect);
let (pretty_file, contents) = match res {
Ok(res) => res,
Err(_) => return None,
Expand All @@ -162,13 +139,10 @@ fn check(cache: &mut Cache,
cache.get_mut(&pretty_file)
.unwrap()
.parse_ids(&pretty_file, &contents, errors);
cache.get_mut(&pretty_file)
.unwrap()
.parse_names(&contents);
}

// Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
with_attrs_in_source(&contents, " href", |url, i| {
with_attrs_in_source(&contents, " href", |url, i, base| {
// Ignore external URLs
if url.starts_with("http:") || url.starts_with("https:") ||
url.starts_with("javascript:") || url.starts_with("ftp:") ||
Expand All @@ -184,9 +158,9 @@ fn check(cache: &mut Cache,
// Once we've plucked out the URL, parse it using our base url and
// then try to extract a file path.
let mut path = file.to_path_buf();
if !url.is_empty() {
if !base.is_empty() || !url.is_empty() {
path.pop();
for part in Path::new(url).components() {
for part in Path::new(base).join(url).components() {
match part {
Component::Prefix(_) |
Component::RootDir => panic!(),
Expand All @@ -197,13 +171,6 @@ fn check(cache: &mut Cache,
}
}

if let Some(extension) = path.extension() {
// don't check these files
if extension == "png" {
return;
}
}

// Alright, if we've found a file name then this file had better
// exist! If it doesn't then we register and print an error.
if path.exists() {
Expand All @@ -218,11 +185,17 @@ fn check(cache: &mut Cache,
pretty_path.display());
return;
}
let res = load_file(cache, root, path.clone(), FromRedirect(false));
if let Some(extension) = path.extension() {
// Ignore none HTML files.
if extension != "html" {
return;
}
}
let res = load_file(cache, root, &path, FromRedirect(false));
let (pretty_path, contents) = match res {
Ok(res) => res,
Err(LoadError::IOError(err)) => {
panic!(format!("error loading {}: {}", path.display(), err));
panic!("error loading {}: {}", path.display(), err);
}
Err(LoadError::BrokenRedirect(target, _)) => {
*errors = true;
Expand All @@ -245,11 +218,10 @@ fn check(cache: &mut Cache,

let entry = &mut cache.get_mut(&pretty_path).unwrap();
entry.parse_ids(&pretty_path, &contents, errors);
entry.parse_names(&contents);

if !(entry.ids.contains(*fragment) || entry.names.contains(*fragment)) {
if !entry.ids.contains(*fragment) {
*errors = true;
print!("{}:{}: broken link fragment ",
print!("{}:{}: broken link fragment ",
pretty_file.display(),
i + 1);
println!("`#{}` pointing to `{}`", fragment, pretty_path.display());
Expand All @@ -267,7 +239,7 @@ fn check(cache: &mut Cache,

fn load_file(cache: &mut Cache,
root: &Path,
mut file: PathBuf,
file: &Path,
redirect: Redirect)
-> Result<(PathBuf, String), LoadError> {
let mut contents = String::new();
Expand All @@ -279,9 +251,9 @@ fn load_file(cache: &mut Cache,
None
}
Entry::Vacant(entry) => {
let mut fp = File::open(file.clone()).map_err(|err| {
let mut fp = File::open(file).map_err(|err| {
if let FromRedirect(true) = redirect {
LoadError::BrokenRedirect(file.clone(), err)
LoadError::BrokenRedirect(file.to_path_buf(), err)
} else {
LoadError::IOError(err)
}
Expand All @@ -297,17 +269,14 @@ fn load_file(cache: &mut Cache,
entry.insert(FileEntry {
source: contents.clone(),
ids: HashSet::new(),
names: HashSet::new(),
});
}
maybe
}
};
file.pop();
match maybe_redirect.map(|url| file.join(url)) {
match maybe_redirect.map(|url| file.parent().unwrap().join(url)) {
Some(redirect_file) => {
let path = PathBuf::from(redirect_file);
load_file(cache, root, path, FromRedirect(true))
load_file(cache, root, &redirect_file, FromRedirect(true))
}
None => Ok((pretty_file, contents)),
}
Expand All @@ -329,10 +298,14 @@ fn maybe_redirect(source: &str) -> Option<String> {
})
}

fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f: F) {
fn with_attrs_in_source<F: FnMut(&str, usize, &str)>(contents: &str, attr: &str, mut f: F) {
let mut base = "";
for (i, mut line) in contents.lines().enumerate() {
while let Some(j) = line.find(attr) {
let rest = &line[j + attr.len()..];
// The base tag should always be the first link in the document so
// we can get away with using one pass.
let is_base = line[..j].ends_with("<base");
line = rest;
let pos_equals = match rest.find("=") {
Some(i) => i,
Expand All @@ -358,7 +331,11 @@ fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f
Some(i) => &rest[..i],
None => continue,
};
f(url, i)
if is_base {
base = url;
continue;
}
f(url, i, base)
}
}
}

0 comments on commit 75b0568

Please sign in to comment.