Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new tool to check HTML #84480

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1567,6 +1567,13 @@ dependencies = [
"winapi 0.3.9",
]

[[package]]
name = "html-checker"
version = "0.1.0"
dependencies = [
"walkdir",
]

[[package]]
name = "html5ever"
version = "0.25.1"
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ members = [
"src/tools/unicode-table-generator",
"src/tools/expand-yaml-anchors",
"src/tools/jsondocck",
"src/tools/html-checker",
]

exclude = [
Expand Down
1 change: 1 addition & 0 deletions src/bootstrap/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ impl<'a> Builder<'a> {
test::RustdocTheme,
test::RustdocUi,
test::RustdocJson,
test::HtmlCheck,
// Run bootstrap close to the end as it's unlikely to fail
test::Bootstrap,
// Run run-make last, since these won't pass without make on Windows
Expand Down
4 changes: 2 additions & 2 deletions src/bootstrap/doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -501,8 +501,8 @@ impl Step for Std {

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Rustc {
stage: u32,
target: TargetSelection,
pub stage: u32,
pub target: TargetSelection,
}

impl Step for Rustc {
Expand Down
50 changes: 49 additions & 1 deletion src/bootstrap/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::fmt;
use std::fs;
use std::iter;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::process::{Command, Stdio};

use build_helper::{self, output, t};

Expand Down Expand Up @@ -144,6 +144,54 @@ You can skip linkcheck with --exclude src/tools/linkchecker"
}
}

fn check_if_tidy_is_installed() -> bool {
Command::new("tidy")
.arg("--version")
.stdout(Stdio::null())
.status()
.map_or(false, |status| status.success())
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct HtmlCheck {
target: TargetSelection,
}

impl Step for HtmlCheck {
type Output = ();
const DEFAULT: bool = true;
const ONLY_HOSTS: bool = true;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
let run = run.path("src/tools/html-checker");
run.default_condition(check_if_tidy_is_installed())
}

fn make_run(run: RunConfig<'_>) {
run.builder.ensure(HtmlCheck { target: run.target });
}

fn run(self, builder: &Builder<'_>) {
if !check_if_tidy_is_installed() {
eprintln!("not running HTML-check tool because `tidy` is missing");
GuillaumeGomez marked this conversation as resolved.
Show resolved Hide resolved
eprintln!(
"Note that `tidy` is not the in-tree `src/tools/tidy` but needs to be installed"
);
panic!("Cannot run html-check tests");
}
// Ensure that a few different kinds of documentation are available.
builder.default_doc(&[]);
builder.ensure(crate::doc::Rustc { target: self.target, stage: builder.top_stage });
GuillaumeGomez marked this conversation as resolved.
Show resolved Hide resolved

try_run(
builder,
builder
.tool_cmd(Tool::HtmlChecker)
.arg(builder.out.join(self.target.triple).join("doc")),
);
}
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct Cargotest {
stage: u32,
Expand Down
1 change: 1 addition & 0 deletions src/bootstrap/tool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ bootstrap_tool!(
ExpandYamlAnchors, "src/tools/expand-yaml-anchors", "expand-yaml-anchors";
LintDocs, "src/tools/lint-docs", "lint-docs";
JsonDocCk, "src/tools/jsondocck", "jsondocck";
HtmlChecker, "src/tools/html-checker", "html-checker";
);

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
Expand Down
3 changes: 2 additions & 1 deletion src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libgl1-mesa-dev \
llvm-dev \
libfreetype6-dev \
libexpat1-dev
libexpat1-dev \
tidy

COPY scripts/sccache.sh /scripts/
RUN sh /scripts/sccache.sh
Expand Down
3 changes: 2 additions & 1 deletion src/ci/docker/host-x86_64/x86_64-gnu-tools/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
cmake \
libssl-dev \
sudo \
xz-utils
xz-utils \
tidy

# Install dependencies for chromium browser
RUN apt-get install -y \
Expand Down
12 changes: 12 additions & 0 deletions src/tools/html-checker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "html-checker"
version = "0.1.0"
authors = ["Guillaume Gomez <guillaume1.gomez@gmail.com>"]
edition = "2018"

[[bin]]
name = "html-checker"
path = "main.rs"

[dependencies]
walkdir = "2"
93 changes: 93 additions & 0 deletions src/tools/html-checker/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
use std::env;
use std::path::Path;
use std::process::{Command, Output};

fn check_html_file(file: &Path) -> usize {
let to_mute = &[
// "disabled" on <link> or "autocomplete" on <select> emit this warning
"PROPRIETARY_ATTRIBUTE",
// It complains when multiple in the same page link to the same anchor for some reason...
GuillaumeGomez marked this conversation as resolved.
Show resolved Hide resolved
"ANCHOR_NOT_UNIQUE",
// If a <span> contains only HTML elements and no text, it complains about it.
"TRIM_EMPTY_ELEMENT",
// FIXME: the three next warnings are about <pre> elements which are not supposed to
// contain HTML. The solution here would be to replace them with a <div>
"MISSING_ENDTAG_BEFORE",
"INSERTING_TAG",
"DISCARDING_UNEXPECTED",
];
let to_mute_s = to_mute.join(",");
let mut command = Command::new("tidy");
command
.arg("-errors")
.arg("-quiet")
.arg("--mute-id") // this option is useful in case we want to mute more warnings
GuillaumeGomez marked this conversation as resolved.
Show resolved Hide resolved
.arg("yes")
.arg("--mute")
.arg(&to_mute_s)
.arg(file);

let Output { status, stderr, .. } = command.output().expect("failed to run tidy command");
if status.success() {
0
} else {
let stderr = String::from_utf8(stderr).expect("String::from_utf8 failed...");
if stderr.is_empty() && status.code() != Some(2) {
0
} else {
eprintln!(
GuillaumeGomez marked this conversation as resolved.
Show resolved Hide resolved
"=> Errors for `{}` (error code: {}) <=",
file.display(),
status.code().unwrap_or(-1)
);
eprintln!("{}", stderr);
stderr.lines().count()
}
}
}

const DOCS_TO_CHECK: &[&str] =
&["alloc", "core", "proc_macro", "implementors", "src", "std", "test"];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than only checking specific crates, could you instead filter out the books you don't want to check?

Copy link
Member Author

@GuillaumeGomez GuillaumeGomez Jun 3, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are A LOT of different ones and I can't know for sure which one will be added. I initially filtered out but it was a huge mess. We don't generate that many crates in the end so I preferred going this way.

EDIT: and some of them don't have "book" in their name, making things funnier, like "rustdoc". :3


// Returns the number of files read and the number of errors.
fn find_all_html_files(dir: &Path) -> (usize, usize) {
let mut files_read = 0;
let mut errors = 0;

for entry in walkdir::WalkDir::new(dir).into_iter().filter_entry(|e| {
e.depth() != 1
|| e.file_name()
.to_str()
.map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s))
.unwrap_or(false)
}) {
let entry = entry.expect("failed to read file");
if !entry.file_type().is_file() {
continue;
}
let entry = entry.path();
if entry.extension().and_then(|s| s.to_str()) == Some("html") {
errors += check_html_file(&entry);
files_read += 1;
}
}
(files_read, errors)
}

fn main() -> Result<(), String> {
let args = env::args().collect::<Vec<_>>();
if args.len() != 2 {
return Err(format!("Usage: {} <doc folder>", args[0]));
}

println!("Running HTML checker...");

let (files_read, errors) = find_all_html_files(&Path::new(&args[1]));
println!("Done! Read {} files...", files_read);
if errors > 0 {
Err(format!("HTML check failed: {} errors", errors))
} else {
println!("No error found!");
Ok(())
}
}