Skip to content

Commit

Permalink
Rollup merge of rust-lang#86059 - GuillaumeGomez:html-checker2, r=Mar…
Browse files Browse the repository at this point in the history
…k-Simulacrum

Add new tool to check HTML

Re-opening of rust-lang#84480.

r? `@Mark-Simulacrum`
  • Loading branch information
JohnTitor authored Jun 28, 2021
2 parents a435b49 + 785b705 commit b5d4343
Show file tree
Hide file tree
Showing 10 changed files with 165 additions and 5 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1578,6 +1578,13 @@ dependencies = [
"winapi 0.3.9",
]

[[package]]
name = "html-checker"
version = "0.1.0"
dependencies = [
"walkdir",
]

[[package]]
name = "html5ever"
version = "0.25.1"
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ members = [
"src/tools/unicode-table-generator",
"src/tools/expand-yaml-anchors",
"src/tools/jsondocck",
"src/tools/html-checker",
]

exclude = [
Expand Down
1 change: 1 addition & 0 deletions src/bootstrap/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ impl<'a> Builder<'a> {
test::RustdocTheme,
test::RustdocUi,
test::RustdocJson,
test::HtmlCheck,
// Run bootstrap close to the end as it's unlikely to fail
test::Bootstrap,
// Run run-make last, since these won't pass without make on Windows
Expand Down
4 changes: 2 additions & 2 deletions src/bootstrap/doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -501,8 +501,8 @@ impl Step for Std {

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Rustc {
stage: u32,
target: TargetSelection,
pub stage: u32,
pub target: TargetSelection,
}

impl Step for Rustc {
Expand Down
45 changes: 44 additions & 1 deletion src/bootstrap/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::fmt;
use std::fs;
use std::iter;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::process::{Command, Stdio};

use build_helper::{self, output, t};

Expand Down Expand Up @@ -161,6 +161,49 @@ You can skip linkcheck with --exclude src/tools/linkchecker"
}
}

fn check_if_tidy_is_installed() -> bool {
Command::new("tidy")
.arg("--version")
.stdout(Stdio::null())
.status()
.map_or(false, |status| status.success())
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct HtmlCheck {
target: TargetSelection,
}

impl Step for HtmlCheck {
type Output = ();
const DEFAULT: bool = true;
const ONLY_HOSTS: bool = true;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
let run = run.path("src/tools/html-checker");
run.lazy_default_condition(Box::new(check_if_tidy_is_installed))
}

fn make_run(run: RunConfig<'_>) {
run.builder.ensure(HtmlCheck { target: run.target });
}

fn run(self, builder: &Builder<'_>) {
if !check_if_tidy_is_installed() {
eprintln!("not running HTML-check tool because `tidy` is missing");
eprintln!(
"Note that `tidy` is not the in-tree `src/tools/tidy` but needs to be installed"
);
panic!("Cannot run html-check tests");
}
// Ensure that a few different kinds of documentation are available.
builder.default_doc(&[]);
builder.ensure(crate::doc::Rustc { target: self.target, stage: builder.top_stage });

try_run(builder, builder.tool_cmd(Tool::HtmlChecker).arg(builder.doc_out(self.target)));
}
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct Cargotest {
stage: u32,
Expand Down
1 change: 1 addition & 0 deletions src/bootstrap/tool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ bootstrap_tool!(
ExpandYamlAnchors, "src/tools/expand-yaml-anchors", "expand-yaml-anchors";
LintDocs, "src/tools/lint-docs", "lint-docs";
JsonDocCk, "src/tools/jsondocck", "jsondocck";
HtmlChecker, "src/tools/html-checker", "html-checker";
);

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
Expand Down
3 changes: 2 additions & 1 deletion src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libgl1-mesa-dev \
llvm-dev \
libfreetype6-dev \
libexpat1-dev
libexpat1-dev \
tidy

COPY scripts/sccache.sh /scripts/
RUN sh /scripts/sccache.sh
Expand Down
3 changes: 2 additions & 1 deletion src/ci/docker/host-x86_64/x86_64-gnu-tools/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
cmake \
libssl-dev \
sudo \
xz-utils
xz-utils \
tidy

# Install dependencies for chromium browser
RUN apt-get install -y \
Expand Down
12 changes: 12 additions & 0 deletions src/tools/html-checker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "html-checker"
version = "0.1.0"
authors = ["Guillaume Gomez <guillaume1.gomez@gmail.com>"]
edition = "2018"

[[bin]]
name = "html-checker"
path = "main.rs"

[dependencies]
walkdir = "2"
93 changes: 93 additions & 0 deletions src/tools/html-checker/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
use std::env;
use std::path::Path;
use std::process::{Command, Output};

fn check_html_file(file: &Path) -> usize {
let to_mute = &[
// "disabled" on <link> or "autocomplete" on <select> emit this warning
"PROPRIETARY_ATTRIBUTE",
// It complains when multiple in the same page link to the same anchor for some reason...
"ANCHOR_NOT_UNIQUE",
// If a <span> contains only HTML elements and no text, it complains about it.
"TRIM_EMPTY_ELEMENT",
// FIXME: the three next warnings are about <pre> elements which are not supposed to
// contain HTML. The solution here would be to replace them with a <div>
"MISSING_ENDTAG_BEFORE",
"INSERTING_TAG",
"DISCARDING_UNEXPECTED",
];
let to_mute_s = to_mute.join(",");
let mut command = Command::new("tidy");
command
.arg("-errors")
.arg("-quiet")
.arg("--mute-id") // this option is useful in case we want to mute more warnings
.arg("yes")
.arg("--mute")
.arg(&to_mute_s)
.arg(file);

let Output { status, stderr, .. } = command.output().expect("failed to run tidy command");
if status.success() {
0
} else {
let stderr = String::from_utf8(stderr).expect("String::from_utf8 failed...");
if stderr.is_empty() && status.code() != Some(2) {
0
} else {
eprintln!(
"=> Errors for `{}` (error code: {}) <=",
file.display(),
status.code().unwrap_or(-1)
);
eprintln!("{}", stderr);
stderr.lines().count()
}
}
}

const DOCS_TO_CHECK: &[&str] =
&["alloc", "core", "proc_macro", "implementors", "src", "std", "test"];

// Returns the number of files read and the number of errors.
fn find_all_html_files(dir: &Path) -> (usize, usize) {
let mut files_read = 0;
let mut errors = 0;

for entry in walkdir::WalkDir::new(dir).into_iter().filter_entry(|e| {
e.depth() != 1
|| e.file_name()
.to_str()
.map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s))
.unwrap_or(false)
}) {
let entry = entry.expect("failed to read file");
if !entry.file_type().is_file() {
continue;
}
let entry = entry.path();
if entry.extension().and_then(|s| s.to_str()) == Some("html") {
errors += check_html_file(&entry);
files_read += 1;
}
}
(files_read, errors)
}

fn main() -> Result<(), String> {
let args = env::args().collect::<Vec<_>>();
if args.len() != 2 {
return Err(format!("Usage: {} <doc folder>", args[0]));
}

println!("Running HTML checker...");

let (files_read, errors) = find_all_html_files(&Path::new(&args[1]));
println!("Done! Read {} files...", files_read);
if errors > 0 {
Err(format!("HTML check failed: {} errors", errors))
} else {
println!("No error found!");
Ok(())
}
}

0 comments on commit b5d4343

Please sign in to comment.