diff --git a/Cargo.lock b/Cargo.lock index 4e0e72d34153f..69f96bcbe63b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,9 +103,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.60" +version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c794e162a5eff65c72ef524dfe393eb923c354e350bb78b9c7383df13f3bc142" +checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602" [[package]] name = "array_tool" @@ -1362,9 +1362,9 @@ dependencies = [ [[package]] name = "fs-err" -version = "2.5.0" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcd1163ae48bda72a20ae26d66a04d3094135cadab911cff418ae5e33f253431" +checksum = "64db3e262960f0662f43a6366788d5f10f7f244b8f7d7d987f560baf5ded5c50" [[package]] name = "fs_extra" @@ -1891,6 +1891,16 @@ dependencies = [ "shlex", ] +[[package]] +name = "jsondoclint" +version = "0.1.0" +dependencies = [ + "anyhow", + "fs-err", + "rustdoc-json-types", + "serde_json", +] + [[package]] name = "jsonpath_lib" version = "0.2.6" @@ -4445,9 +4455,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.83" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38dd04e3c8279e75b31ef29dbdceebfe5ad89f4d0937213c53f7d49d01b3d5a7" +checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" dependencies = [ "indexmap", "itoa", diff --git a/Cargo.toml b/Cargo.toml index 5753730053f4c..e49fe5e2f6356 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ members = [ "src/tools/unicode-table-generator", "src/tools/expand-yaml-anchors", "src/tools/jsondocck", + "src/tools/jsondoclint", "src/tools/html-checker", "src/tools/bump-stage0", "src/tools/replace-version-placeholder", diff --git a/src/bootstrap/test.rs b/src/bootstrap/test.rs index f61c95830858f..9d286ddd6d164 100644 --- a/src/bootstrap/test.rs +++ b/src/bootstrap/test.rs @@ -1341,6 +1341,8 @@ note: if you're sure you want to do this, please open an issue as to why. In the let json_compiler = compiler.with_stage(0); cmd.arg("--jsondocck-path") .arg(builder.ensure(tool::JsonDocCk { compiler: json_compiler, target })); + cmd.arg("--jsondoclint-path") + .arg(builder.ensure(tool::JsonDocLint { compiler: json_compiler, target })); } if mode == "run-make" { diff --git a/src/bootstrap/tool.rs b/src/bootstrap/tool.rs index c3b04a9bbce3f..7d4ed24b64850 100644 --- a/src/bootstrap/tool.rs +++ b/src/bootstrap/tool.rs @@ -376,6 +376,7 @@ bootstrap_tool!( ExpandYamlAnchors, "src/tools/expand-yaml-anchors", "expand-yaml-anchors"; LintDocs, "src/tools/lint-docs", "lint-docs"; JsonDocCk, "src/tools/jsondocck", "jsondocck"; + JsonDocLint, "src/tools/jsondoclint", "jsondoclint"; HtmlChecker, "src/tools/html-checker", "html-checker"; BumpStage0, "src/tools/bump-stage0", "bump-stage0"; ReplaceVersionPlaceholder, "src/tools/replace-version-placeholder", "replace-version-placeholder"; diff --git a/src/etc/check_missing_items.py b/src/etc/check_missing_items.py deleted file mode 100644 index 0026c4cbdca2c..0000000000000 --- a/src/etc/check_missing_items.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python - -# This test ensures that every ID in the produced json actually resolves to an item either in -# `index` or `paths`. It DOES NOT check that the structure of the produced json is actually in -# any way correct, for example an empty map would pass. - -# FIXME: Better error output - -import sys -import json - -crate = json.load(open(sys.argv[1], encoding="utf-8")) - - -def get_local_item(item_id): - if item_id in crate["index"]: - return crate["index"][item_id] - print("Missing local ID:", item_id) - sys.exit(1) - - -# local IDs have to be in `index`, external ones can sometimes be in `index` but otherwise have -# to be in `paths` -def valid_id(item_id): - return item_id in crate["index"] or item_id[0] != "0" and item_id in crate["paths"] - - -def check_generics(generics): - for param in generics["params"]: - check_generic_param(param) - for where_predicate in generics["where_predicates"]: - if "bound_predicate" in where_predicate: - pred = where_predicate["bound_predicate"] - check_type(pred["type"]) - for bound in pred["bounds"]: - check_generic_bound(bound) - elif "region_predicate" in where_predicate: - pred = where_predicate["region_predicate"] - for bound in pred["bounds"]: - check_generic_bound(bound) - elif "eq_predicate" in where_predicate: - pred = where_predicate["eq_predicate"] - check_type(pred["rhs"]) - check_type(pred["lhs"]) - - -def check_generic_param(param): - if "type" in param["kind"]: - ty = param["kind"]["type"] - if ty["default"]: - check_type(ty["default"]) - for bound in ty["bounds"]: - check_generic_bound(bound) - elif "const" in param["kind"]: - check_type(param["kind"]["const"]) - - -def check_generic_bound(bound): - if "trait_bound" in bound: - for param in bound["trait_bound"]["generic_params"]: - check_generic_param(param) - check_path(bound["trait_bound"]["trait"]) - - -def check_decl(decl): - for (_name, ty) in decl["inputs"]: - check_type(ty) - if decl["output"]: - check_type(decl["output"]) - -def check_path(path): - args = path["args"] - if args: - if "angle_bracketed" in args: - for arg in args["angle_bracketed"]["args"]: - if "type" in arg: - check_type(arg["type"]) - elif "const" in arg: - check_type(arg["const"]["type"]) - for binding in args["angle_bracketed"]["bindings"]: - if "equality" in binding["binding"]: - term = binding["binding"]["equality"] - if "type" in term: check_type(term["type"]) - elif "const" in term: check_type(term["const"]) - elif "constraint" in binding["binding"]: - for bound in binding["binding"]["constraint"]: - check_generic_bound(bound) - elif "parenthesized" in args: - for input_ty in args["parenthesized"]["inputs"]: - check_type(input_ty) - if args["parenthesized"]["output"]: - check_type(args["parenthesized"]["output"]) - - if path["id"] in crate["index"]: - work_list.add(path["id"]) - elif path["id"] not in crate["paths"]: - print("Id not in index or paths:", path["id"]) - sys.exit(1) - -def check_type(ty): - if ty["kind"] == "resolved_path": - check_path(ty["inner"]) - elif ty["kind"] == "tuple": - for ty in ty["inner"]: - check_type(ty) - elif ty["kind"] == "slice": - check_type(ty["inner"]) - elif ty["kind"] == "impl_trait": - for bound in ty["inner"]: - check_generic_bound(bound) - elif ty["kind"] in ("raw_pointer", "borrowed_ref", "array"): - check_type(ty["inner"]["type"]) - elif ty["kind"] == "function_pointer": - for param in ty["inner"]["generic_params"]: - check_generic_param(param) - check_decl(ty["inner"]["decl"]) - elif ty["kind"] == "qualified_path": - check_type(ty["inner"]["self_type"]) - check_path(ty["inner"]["trait"]) - - -work_list = set([crate["root"]]) -visited = work_list.copy() - -while work_list: - current = work_list.pop() - visited.add(current) - item = get_local_item(current) - # check intradoc links - for (_name, link) in item["links"].items(): - if not valid_id(link): - print("Intra-doc link contains invalid ID:", link) - - # check all fields that reference types such as generics as well as nested items - # (modules, structs, traits, and enums) - if item["kind"] == "module": - work_list |= set(item["inner"]["items"]) - visited - elif item["kind"] == "struct": - check_generics(item["inner"]["generics"]) - work_list |= set(item["inner"]["impls"]) - visited - if "tuple" in item["inner"]["kind"]: - work_list |= set(filter(None, item["inner"]["kind"]["tuple"])) - visited - elif "plain" in item["inner"]["kind"]: - work_list |= set(item["inner"]["kind"]["plain"]["fields"]) - visited - elif item["kind"] == "struct_field": - check_type(item["inner"]) - elif item["kind"] == "enum": - check_generics(item["inner"]["generics"]) - work_list |= ( - set(item["inner"]["variants"]) | set(item["inner"]["impls"]) - ) - visited - elif item["kind"] == "variant": - if item["inner"]["variant_kind"] == "tuple": - for field_id in filter(None, item["inner"]["variant_inner"]): - work_list.add(field_id) - elif item["inner"]["variant_kind"] == "struct": - work_list |= set(item["inner"]["variant_inner"]["fields"]) - visited - elif item["kind"] in ("function", "method"): - check_generics(item["inner"]["generics"]) - check_decl(item["inner"]["decl"]) - elif item["kind"] in ("static", "constant", "assoc_const"): - check_type(item["inner"]["type"]) - elif item["kind"] == "typedef": - check_type(item["inner"]["type"]) - check_generics(item["inner"]["generics"]) - elif item["kind"] == "opaque_ty": - check_generics(item["inner"]["generics"]) - for bound in item["inner"]["bounds"]: - check_generic_bound(bound) - elif item["kind"] == "trait_alias": - check_generics(item["inner"]["params"]) - for bound in item["inner"]["bounds"]: - check_generic_bound(bound) - elif item["kind"] == "trait": - check_generics(item["inner"]["generics"]) - for bound in item["inner"]["bounds"]: - check_generic_bound(bound) - work_list |= ( - set(item["inner"]["items"]) | set(item["inner"]["implementations"]) - ) - visited - elif item["kind"] == "impl": - check_generics(item["inner"]["generics"]) - if item["inner"]["trait"]: - check_path(item["inner"]["trait"]) - if item["inner"]["blanket_impl"]: - check_type(item["inner"]["blanket_impl"]) - check_type(item["inner"]["for"]) - for assoc_item in item["inner"]["items"]: - if not valid_id(assoc_item): - print("Impl block referenced a missing ID:", assoc_item) - sys.exit(1) - elif item["kind"] == "assoc_type": - for bound in item["inner"]["bounds"]: - check_generic_bound(bound) - if item["inner"]["default"]: - check_type(item["inner"]["default"]) - elif item["kind"] == "import": - if item["inner"]["id"]: - inner_id = item["inner"]["id"] - assert valid_id(inner_id) - if inner_id in crate["index"] and inner_id not in visited: - work_list.add(inner_id) diff --git a/src/test/rustdoc-json/type/extern.rs b/src/test/rustdoc-json/type/extern.rs new file mode 100644 index 0000000000000..d287d5ebec543 --- /dev/null +++ b/src/test/rustdoc-json/type/extern.rs @@ -0,0 +1,10 @@ +#![feature(extern_types)] + +extern { + /// No inner information + pub type Foo; +} + +// @is "$.index[*][?(@.docs=='No inner information')].name" '"Foo"' +// @is "$.index[*][?(@.docs=='No inner information')].kind" '"foreign_type"' +// @!has "$.index[*][?(@.docs=='No inner information')].inner" diff --git a/src/tools/compiletest/src/common.rs b/src/tools/compiletest/src/common.rs index 6f17b9e1be9a3..64df76e27720d 100644 --- a/src/tools/compiletest/src/common.rs +++ b/src/tools/compiletest/src/common.rs @@ -203,6 +203,9 @@ pub struct Config { /// The jsondocck executable. pub jsondocck_path: Option, + /// The jsondoclint executable. + pub jsondoclint_path: Option, + /// The LLVM `FileCheck` binary path. pub llvm_filecheck: Option, diff --git a/src/tools/compiletest/src/main.rs b/src/tools/compiletest/src/main.rs index 0e2cc52a645bc..38c7b87fc0d9d 100644 --- a/src/tools/compiletest/src/main.rs +++ b/src/tools/compiletest/src/main.rs @@ -64,6 +64,7 @@ pub fn parse_config(args: Vec) -> Config { .optopt("", "rust-demangler-path", "path to rust-demangler to use in tests", "PATH") .reqopt("", "python", "path to python to use for doc tests", "PATH") .optopt("", "jsondocck-path", "path to jsondocck to use for doc tests", "PATH") + .optopt("", "jsondoclint-path", "path to jsondoclint to use for doc tests", "PATH") .optopt("", "valgrind-path", "path to Valgrind executable for Valgrind tests", "PROGRAM") .optflag("", "force-valgrind", "fail if Valgrind tests cannot be run under Valgrind") .optopt("", "run-clang-based-tests-with", "path to Clang executable", "PATH") @@ -226,6 +227,7 @@ pub fn parse_config(args: Vec) -> Config { rust_demangler_path: matches.opt_str("rust-demangler-path").map(PathBuf::from), python: matches.opt_str("python").unwrap(), jsondocck_path: matches.opt_str("jsondocck-path"), + jsondoclint_path: matches.opt_str("jsondoclint-path"), valgrind_path: matches.opt_str("valgrind-path"), force_valgrind: matches.opt_present("force-valgrind"), run_clang_based_tests_with: matches.opt_str("run-clang-based-tests-with"), diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs index e2afa5ef59020..8f289876f7307 100644 --- a/src/tools/compiletest/src/runtest.rs +++ b/src/tools/compiletest/src/runtest.rs @@ -2563,14 +2563,13 @@ impl<'test> TestCx<'test> { let mut json_out = out_dir.join(self.testpaths.file.file_stem().unwrap()); json_out.set_extension("json"); + let res = self.cmd2procres( - Command::new(&self.config.python) - .arg(root.join("src/etc/check_missing_items.py")) - .arg(&json_out), + Command::new(self.config.jsondoclint_path.as_ref().unwrap()).arg(&json_out), ); if !res.status.success() { - self.fatal_proc_rec("check_missing_items failed!", &res); + self.fatal_proc_rec("jsondoclint failed!", &res); } } diff --git a/src/tools/jsondoclint/Cargo.toml b/src/tools/jsondoclint/Cargo.toml new file mode 100644 index 0000000000000..84a6c7f96c464 --- /dev/null +++ b/src/tools/jsondoclint/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "jsondoclint" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.62" +fs-err = "2.8.1" +rustdoc-json-types = { version = "0.1.0", path = "../../rustdoc-json-types" } +serde_json = "1.0.85" diff --git a/src/tools/jsondoclint/src/item_kind.rs b/src/tools/jsondoclint/src/item_kind.rs new file mode 100644 index 0000000000000..ad8e96a0bd81d --- /dev/null +++ b/src/tools/jsondoclint/src/item_kind.rs @@ -0,0 +1,184 @@ +use rustdoc_json_types::{Item, ItemEnum, ItemKind, ItemSummary}; + +/// A univeral way to represent an [`ItemEnum`] or [`ItemKind`] +#[derive(Debug)] +pub(crate) enum Kind { + Module, + ExternCrate, + Import, + Struct, + StructField, + Union, + Enum, + Variant, + Function, + Typedef, + OpaqueTy, + Constant, + Trait, + TraitAlias, + Method, + Impl, + Static, + ForeignType, + Macro, + ProcAttribute, + ProcDerive, + AssocConst, + AssocType, + Primitive, + Keyword, + // Not in ItemKind + ProcMacro, +} + +impl Kind { + pub fn can_appear_in_mod(self) -> bool { + use Kind::*; + match self { + Module => true, + ExternCrate => true, + Import => true, + Union => true, + Struct => true, + Enum => true, + Function => true, + Trait => true, + TraitAlias => true, + Impl => true, + Typedef => true, + Constant => true, + Static => true, + Macro => true, + ProcMacro => true, + Primitive => true, + ForeignType => true, + + // FIXME(adotinthevoid): I'm not sure if these are corrent + Keyword => false, + OpaqueTy => false, + ProcAttribute => false, + ProcDerive => false, + + // Only in traits + AssocConst => false, + AssocType => false, + Method => false, + + StructField => false, // Only in structs or variants + Variant => false, // Only in enums + } + } + + pub fn can_appear_in_trait(self) -> bool { + match self { + Kind::AssocConst => true, + Kind::AssocType => true, + Kind::Method => true, + + Kind::Module => false, + Kind::ExternCrate => false, + Kind::Import => false, + Kind::Struct => false, + Kind::StructField => false, + Kind::Union => false, + Kind::Enum => false, + Kind::Variant => false, + Kind::Function => false, + Kind::Typedef => false, + Kind::OpaqueTy => false, + Kind::Constant => false, + Kind::Trait => false, + Kind::TraitAlias => false, + Kind::Impl => false, + Kind::Static => false, + Kind::ForeignType => false, + Kind::Macro => false, + Kind::ProcAttribute => false, + Kind::ProcDerive => false, + Kind::Primitive => false, + Kind::Keyword => false, + Kind::ProcMacro => false, + } + } + + pub fn is_struct_field(self) -> bool { + matches!(self, Kind::StructField) + } + pub fn is_module(self) -> bool { + matches!(self, Kind::Module) + } + pub fn is_impl(self) -> bool { + matches!(self, Kind::Impl) + } + pub fn is_variant(self) -> bool { + matches!(self, Kind::Variant) + } + pub fn is_trait(self) -> bool { + matches!(self, Kind::Trait) + } + pub fn is_struct_enum_union(self) -> bool { + matches!(self, Kind::Struct | Kind::Enum | Kind::Union) + } + + pub fn from_item(i: &Item) -> Self { + use Kind::*; + match i.inner { + ItemEnum::Module(_) => Module, + ItemEnum::Import(_) => Import, + ItemEnum::Union(_) => Union, + ItemEnum::Struct(_) => Struct, + ItemEnum::StructField(_) => StructField, + ItemEnum::Enum(_) => Enum, + ItemEnum::Variant(_) => Variant, + ItemEnum::Function(_) => Function, + ItemEnum::Trait(_) => Trait, + ItemEnum::TraitAlias(_) => TraitAlias, + ItemEnum::Method(_) => Method, + ItemEnum::Impl(_) => Impl, + ItemEnum::Typedef(_) => Typedef, + ItemEnum::OpaqueTy(_) => OpaqueTy, + ItemEnum::Constant(_) => Constant, + ItemEnum::Static(_) => Static, + ItemEnum::Macro(_) => Macro, + ItemEnum::ProcMacro(_) => ProcMacro, + // https://github.com/rust-lang/rust/issues/100961 + ItemEnum::PrimitiveType(_) => Primitive, + ItemEnum::ForeignType => ForeignType, + ItemEnum::ExternCrate { .. } => ExternCrate, + ItemEnum::AssocConst { .. } => AssocConst, + ItemEnum::AssocType { .. } => AssocType, + } + } + + pub fn from_summary(s: &ItemSummary) -> Self { + use Kind::*; + match s.kind { + ItemKind::AssocConst => AssocConst, + ItemKind::AssocType => AssocType, + ItemKind::Constant => Constant, + ItemKind::Enum => Enum, + ItemKind::ExternCrate => ExternCrate, + ItemKind::ForeignType => ForeignType, + ItemKind::Function => Function, + ItemKind::Impl => Impl, + ItemKind::Import => Import, + ItemKind::Keyword => Keyword, + ItemKind::Macro => Macro, + ItemKind::Method => Method, + ItemKind::Module => Module, + ItemKind::OpaqueTy => OpaqueTy, + ItemKind::Primitive => Primitive, + ItemKind::ProcAttribute => ProcAttribute, + ItemKind::ProcDerive => ProcDerive, + ItemKind::Static => Static, + ItemKind::Struct => Struct, + ItemKind::StructField => StructField, + ItemKind::Trait => Trait, + ItemKind::TraitAlias => TraitAlias, + ItemKind::Typedef => Typedef, + ItemKind::Union => Union, + ItemKind::Variant => Variant, + } + } +} diff --git a/src/tools/jsondoclint/src/json_find.rs b/src/tools/jsondoclint/src/json_find.rs new file mode 100644 index 0000000000000..95ea8866609d0 --- /dev/null +++ b/src/tools/jsondoclint/src/json_find.rs @@ -0,0 +1,74 @@ +use std::fmt::Write; + +use serde_json::Value; + +#[derive(Debug, Clone)] +pub enum SelectorPart { + Field(String), + Index(usize), +} + +pub type Selector = Vec; + +pub fn to_jsonpath(sel: &Selector) -> String { + let mut s = String::from("$"); + for part in sel { + match part { + SelectorPart::Field(name) => { + if is_jsonpath_safe(name) { + write!(&mut s, ".{}", name).unwrap(); + } else { + // This is probably wrong in edge cases, but all Id's are + // just ascii alphanumerics, `-` `_`, and `:` + write!(&mut s, "[{name:?}]").unwrap(); + } + } + SelectorPart::Index(idx) => write!(&mut s, "[{idx}]").unwrap(), + } + } + s +} + +fn is_jsonpath_safe(s: &str) -> bool { + s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') +} + +pub fn find_selector(haystack: &Value, needle: &Value) -> Vec { + let mut result = Vec::new(); + let mut sel = Selector::new(); + find_selector_recursive(haystack, needle, &mut result, &mut sel); + result +} + +fn find_selector_recursive( + haystack: &Value, + needle: &Value, + result: &mut Vec, + pos: &mut Selector, +) { + if needle == haystack { + result.push(pos.clone()); + // Haystack cant both contain needle and be needle + } else { + match haystack { + Value::Null => {} + Value::Bool(_) => {} + Value::Number(_) => {} + Value::String(_) => {} + Value::Array(arr) => { + for (idx, subhaystack) in arr.iter().enumerate() { + pos.push(SelectorPart::Index(idx)); + find_selector_recursive(subhaystack, needle, result, pos); + pos.pop().unwrap(); + } + } + Value::Object(obj) => { + for (key, subhaystack) in obj { + pos.push(SelectorPart::Field(key.clone())); + find_selector_recursive(subhaystack, needle, result, pos); + pos.pop().unwrap(); + } + } + } + } +} diff --git a/src/tools/jsondoclint/src/main.rs b/src/tools/jsondoclint/src/main.rs new file mode 100644 index 0000000000000..70d7a82a57605 --- /dev/null +++ b/src/tools/jsondoclint/src/main.rs @@ -0,0 +1,64 @@ +use std::env; + +use anyhow::{anyhow, bail, Result}; +use fs_err as fs; +use rustdoc_json_types::{Crate, Id, FORMAT_VERSION}; +use serde_json::Value; + +pub(crate) mod item_kind; +mod json_find; +mod validator; + +#[derive(Debug)] +struct Error { + kind: ErrorKind, + id: Id, +} + +#[derive(Debug)] +enum ErrorKind { + NotFound, + Custom(String), +} + +fn main() -> Result<()> { + let path = env::args().nth(1).ok_or_else(|| anyhow!("no path given"))?; + let contents = fs::read_to_string(&path)?; + let krate: Crate = serde_json::from_str(&contents)?; + assert_eq!(krate.format_version, FORMAT_VERSION); + + let mut validator = validator::Validator::new(&krate); + validator.check_crate(); + + if !validator.errs.is_empty() { + for err in validator.errs { + match err.kind { + ErrorKind::NotFound => { + let krate_json: Value = serde_json::from_str(&contents)?; + + let sels = + json_find::find_selector(&krate_json, &Value::String(err.id.0.clone())); + match &sels[..] { + [] => unreachable!( + "id must be in crate, or it wouldn't be reported as not found" + ), + [sel] => eprintln!( + "{} not in index or paths, but refered to at '{}'", + err.id.0, + json_find::to_jsonpath(&sel) + ), + [sel, ..] => eprintln!( + "{} not in index or paths, but refered to at '{}' and more", + err.id.0, + json_find::to_jsonpath(&sel) + ), + } + } + ErrorKind::Custom(msg) => eprintln!("{}: {}", err.id.0, msg), + } + } + bail!("Errors validating json {path}"); + } + + Ok(()) +} diff --git a/src/tools/jsondoclint/src/validator.rs b/src/tools/jsondoclint/src/validator.rs new file mode 100644 index 0000000000000..a0e77127dc2ca --- /dev/null +++ b/src/tools/jsondoclint/src/validator.rs @@ -0,0 +1,442 @@ +use std::collections::HashSet; +use std::hash::Hash; + +use rustdoc_json_types::{ + Constant, Crate, DynTrait, Enum, FnDecl, Function, FunctionPointer, GenericArg, GenericArgs, + GenericBound, GenericParamDef, Generics, Id, Impl, Import, ItemEnum, Method, Module, OpaqueTy, + Path, ProcMacro, Static, Struct, StructKind, Term, Trait, TraitAlias, Type, TypeBinding, + TypeBindingKind, Typedef, Union, Variant, WherePredicate, +}; + +use crate::{item_kind::Kind, Error, ErrorKind}; + +/// The Validator walks over the JSON tree, and ensures it is well formed. +/// It is made of several parts. +/// +/// - `check_*`: These take a type from [`rustdoc_json_types`], and check that +/// it is well formed. This involves calling `check_*` functions on +/// fields of that item, and `add_*` functions on [`Id`]s. +/// - `add_*`: These add an [`Id`] to the worklist, after validating it to check if +/// the `Id` is a kind expected in this suituation. +#[derive(Debug)] +pub struct Validator<'a> { + pub(crate) errs: Vec, + krate: &'a Crate, + /// Worklist of Ids to check. + todo: HashSet<&'a Id>, + /// Ids that have already been visited, so don't need to be checked again. + seen_ids: HashSet<&'a Id>, + /// Ids that have already been reported missing. + missing_ids: HashSet<&'a Id>, +} + +enum PathKind { + Trait, + StructEnumUnion, +} + +impl<'a> Validator<'a> { + pub fn new(krate: &'a Crate) -> Self { + Self { + krate, + errs: Vec::new(), + seen_ids: HashSet::new(), + todo: HashSet::new(), + missing_ids: HashSet::new(), + } + } + + pub fn check_crate(&mut self) { + let root = &self.krate.root; + self.add_mod_id(root); + while let Some(id) = set_remove(&mut self.todo) { + self.seen_ids.insert(id); + self.check_item(id); + } + } + + fn check_item(&mut self, id: &'a Id) { + if let Some(item) = &self.krate.index.get(id) { + match &item.inner { + ItemEnum::Import(x) => self.check_import(x), + ItemEnum::Union(x) => self.check_union(x), + ItemEnum::Struct(x) => self.check_struct(x), + ItemEnum::StructField(x) => self.check_struct_field(x), + ItemEnum::Enum(x) => self.check_enum(x), + ItemEnum::Variant(x) => self.check_variant(x, id), + ItemEnum::Function(x) => self.check_function(x), + ItemEnum::Trait(x) => self.check_trait(x), + ItemEnum::TraitAlias(x) => self.check_trait_alias(x), + ItemEnum::Method(x) => self.check_method(x), + ItemEnum::Impl(x) => self.check_impl(x), + ItemEnum::Typedef(x) => self.check_typedef(x), + ItemEnum::OpaqueTy(x) => self.check_opaque_ty(x), + ItemEnum::Constant(x) => self.check_constant(x), + ItemEnum::Static(x) => self.check_static(x), + ItemEnum::ForeignType => {} // nop + ItemEnum::Macro(x) => self.check_macro(x), + ItemEnum::ProcMacro(x) => self.check_proc_macro(x), + ItemEnum::PrimitiveType(x) => self.check_primitive_type(x), + ItemEnum::Module(x) => self.check_module(x), + // FIXME: Why don't these have their own structs? + ItemEnum::ExternCrate { .. } => {} + ItemEnum::AssocConst { type_, default: _ } => self.check_type(type_), + ItemEnum::AssocType { generics, bounds, default } => { + self.check_generics(generics); + bounds.iter().for_each(|b| self.check_generic_bound(b)); + if let Some(ty) = default { + self.check_type(ty); + } + } + } + } else { + assert!(self.krate.paths.contains_key(id)); + } + } + + // Core checkers + fn check_module(&mut self, module: &'a Module) { + module.items.iter().for_each(|i| self.add_mod_item_id(i)); + } + + fn check_import(&mut self, x: &'a Import) { + if x.glob { + self.add_mod_id(x.id.as_ref().unwrap()); + } else if let Some(id) = &x.id { + self.add_mod_item_id(id); + } + } + + fn check_union(&mut self, x: &'a Union) { + self.check_generics(&x.generics); + x.fields.iter().for_each(|i| self.add_field_id(i)); + x.impls.iter().for_each(|i| self.add_impl_id(i)); + } + + fn check_struct(&mut self, x: &'a Struct) { + self.check_generics(&x.generics); + match &x.kind { + StructKind::Unit => {} + StructKind::Tuple(fields) => fields.iter().flatten().for_each(|f| self.add_field_id(f)), + StructKind::Plain { fields, fields_stripped: _ } => { + fields.iter().for_each(|f| self.add_field_id(f)) + } + } + x.impls.iter().for_each(|i| self.add_impl_id(i)); + } + + fn check_struct_field(&mut self, x: &'a Type) { + self.check_type(x); + } + + fn check_enum(&mut self, x: &'a Enum) { + self.check_generics(&x.generics); + x.variants.iter().for_each(|i| self.add_variant_id(i)); + x.impls.iter().for_each(|i| self.add_impl_id(i)); + } + + fn check_variant(&mut self, x: &'a Variant, id: &'a Id) { + match x { + Variant::Plain(discr) => { + if let Some(discr) = discr { + if let (Err(_), Err(_)) = + (discr.value.parse::(), discr.value.parse::()) + { + self.fail( + id, + ErrorKind::Custom(format!( + "Failed to parse discriminant value `{}`", + discr.value + )), + ); + } + } + } + Variant::Tuple(tys) => tys.iter().flatten().for_each(|t| self.add_field_id(t)), + Variant::Struct { fields, fields_stripped: _ } => { + fields.iter().for_each(|f| self.add_field_id(f)) + } + } + } + + fn check_function(&mut self, x: &'a Function) { + self.check_generics(&x.generics); + self.check_fn_decl(&x.decl); + } + + fn check_trait(&mut self, x: &'a Trait) { + self.check_generics(&x.generics); + x.items.iter().for_each(|i| self.add_trait_item_id(i)); + x.bounds.iter().for_each(|i| self.check_generic_bound(i)); + x.implementations.iter().for_each(|i| self.add_impl_id(i)); + } + + fn check_trait_alias(&mut self, x: &'a TraitAlias) { + self.check_generics(&x.generics); + x.params.iter().for_each(|i| self.check_generic_bound(i)); + } + + fn check_method(&mut self, x: &'a Method) { + self.check_fn_decl(&x.decl); + self.check_generics(&x.generics); + } + + fn check_impl(&mut self, x: &'a Impl) { + self.check_generics(&x.generics); + if let Some(path) = &x.trait_ { + self.check_path(path, PathKind::Trait); + } + self.check_type(&x.for_); + x.items.iter().for_each(|i| self.add_trait_item_id(i)); + if let Some(blanket_impl) = &x.blanket_impl { + self.check_type(blanket_impl) + } + } + + fn check_typedef(&mut self, x: &'a Typedef) { + self.check_generics(&x.generics); + self.check_type(&x.type_); + } + + fn check_opaque_ty(&mut self, x: &'a OpaqueTy) { + x.bounds.iter().for_each(|b| self.check_generic_bound(b)); + self.check_generics(&x.generics); + } + + fn check_constant(&mut self, x: &'a Constant) { + self.check_type(&x.type_); + } + + fn check_static(&mut self, x: &'a Static) { + self.check_type(&x.type_); + } + + fn check_macro(&mut self, _: &'a str) { + // nop + } + + fn check_proc_macro(&mut self, _: &'a ProcMacro) { + // nop + } + + fn check_primitive_type(&mut self, _: &'a str) { + // nop + } + + fn check_generics(&mut self, x: &'a Generics) { + x.params.iter().for_each(|p| self.check_generic_param_def(p)); + x.where_predicates.iter().for_each(|w| self.check_where_predicate(w)); + } + + fn check_type(&mut self, x: &'a Type) { + match x { + Type::ResolvedPath(path) => self.check_path(path, PathKind::StructEnumUnion), + Type::DynTrait(dyn_trait) => self.check_dyn_trait(dyn_trait), + Type::Generic(_) => {} + Type::Primitive(_) => {} + Type::FunctionPointer(fp) => self.check_function_pointer(&**fp), + Type::Tuple(tys) => tys.iter().for_each(|ty| self.check_type(ty)), + Type::Slice(inner) => self.check_type(&**inner), + Type::Array { type_, len: _ } => self.check_type(&**type_), + Type::ImplTrait(bounds) => bounds.iter().for_each(|b| self.check_generic_bound(b)), + Type::Infer => {} + Type::RawPointer { mutable: _, type_ } => self.check_type(&**type_), + Type::BorrowedRef { lifetime: _, mutable: _, type_ } => self.check_type(&**type_), + Type::QualifiedPath { name: _, args, self_type, trait_ } => { + self.check_generic_args(&**args); + self.check_type(&**self_type); + self.check_path(trait_, PathKind::Trait); + } + } + } + + fn check_fn_decl(&mut self, x: &'a FnDecl) { + x.inputs.iter().for_each(|(_name, ty)| self.check_type(ty)); + if let Some(output) = &x.output { + self.check_type(output); + } + } + + fn check_generic_bound(&mut self, x: &'a GenericBound) { + match x { + GenericBound::TraitBound { trait_, generic_params, modifier: _ } => { + self.check_path(trait_, PathKind::Trait); + generic_params.iter().for_each(|gpd| self.check_generic_param_def(gpd)); + } + GenericBound::Outlives(_) => {} + } + } + + fn check_path(&mut self, x: &'a Path, kind: PathKind) { + match kind { + PathKind::Trait => self.add_trait_id(&x.id), + PathKind::StructEnumUnion => self.add_struct_enum_union_id(&x.id), + } + if let Some(args) = &x.args { + self.check_generic_args(&**args); + } + } + + fn check_generic_args(&mut self, x: &'a GenericArgs) { + match x { + GenericArgs::AngleBracketed { args, bindings } => { + args.iter().for_each(|arg| self.check_generic_arg(arg)); + bindings.iter().for_each(|bind| self.check_type_binding(bind)); + } + GenericArgs::Parenthesized { inputs, output } => { + inputs.iter().for_each(|ty| self.check_type(ty)); + if let Some(o) = output { + self.check_type(o); + } + } + } + } + + fn check_generic_param_def(&mut self, gpd: &'a GenericParamDef) { + match &gpd.kind { + rustdoc_json_types::GenericParamDefKind::Lifetime { outlives: _ } => {} + rustdoc_json_types::GenericParamDefKind::Type { bounds, default, synthetic: _ } => { + bounds.iter().for_each(|b| self.check_generic_bound(b)); + if let Some(ty) = default { + self.check_type(ty); + } + } + rustdoc_json_types::GenericParamDefKind::Const { type_, default: _ } => { + self.check_type(type_) + } + } + } + + fn check_generic_arg(&mut self, arg: &'a GenericArg) { + match arg { + GenericArg::Lifetime(_) => {} + GenericArg::Type(ty) => self.check_type(ty), + GenericArg::Const(c) => self.check_constant(c), + GenericArg::Infer => {} + } + } + + fn check_type_binding(&mut self, bind: &'a TypeBinding) { + self.check_generic_args(&bind.args); + match &bind.binding { + TypeBindingKind::Equality(term) => self.check_term(term), + TypeBindingKind::Constraint(bounds) => { + bounds.iter().for_each(|b| self.check_generic_bound(b)) + } + } + } + + fn check_term(&mut self, term: &'a Term) { + match term { + Term::Type(ty) => self.check_type(ty), + Term::Constant(con) => self.check_constant(con), + } + } + + fn check_where_predicate(&mut self, w: &'a WherePredicate) { + match w { + WherePredicate::BoundPredicate { type_, bounds, generic_params } => { + self.check_type(type_); + bounds.iter().for_each(|b| self.check_generic_bound(b)); + generic_params.iter().for_each(|gpd| self.check_generic_param_def(gpd)); + } + WherePredicate::RegionPredicate { lifetime: _, bounds } => { + bounds.iter().for_each(|b| self.check_generic_bound(b)); + } + WherePredicate::EqPredicate { lhs, rhs } => { + self.check_type(lhs); + self.check_term(rhs); + } + } + } + + fn check_dyn_trait(&mut self, dyn_trait: &'a DynTrait) { + for pt in &dyn_trait.traits { + self.check_path(&pt.trait_, PathKind::Trait); + pt.generic_params.iter().for_each(|gpd| self.check_generic_param_def(gpd)); + } + } + + fn check_function_pointer(&mut self, fp: &'a FunctionPointer) { + self.check_fn_decl(&fp.decl); + fp.generic_params.iter().for_each(|gpd| self.check_generic_param_def(gpd)); + } + + fn add_id_checked(&mut self, id: &'a Id, valid: fn(Kind) -> bool, expected: &str) { + if let Some(kind) = self.kind_of(id) { + if valid(kind) { + if !self.seen_ids.contains(id) { + self.todo.insert(id); + } + } else { + self.fail_expecting(id, expected); + } + } else { + if !self.missing_ids.contains(id) { + self.missing_ids.insert(id); + self.fail(id, ErrorKind::NotFound) + } + } + } + + fn add_field_id(&mut self, id: &'a Id) { + self.add_id_checked(id, Kind::is_struct_field, "StructField"); + } + + fn add_mod_id(&mut self, id: &'a Id) { + self.add_id_checked(id, Kind::is_module, "Module"); + } + fn add_impl_id(&mut self, id: &'a Id) { + self.add_id_checked(id, Kind::is_impl, "Impl"); + } + + fn add_variant_id(&mut self, id: &'a Id) { + self.add_id_checked(id, Kind::is_variant, "Variant"); + } + + fn add_trait_id(&mut self, id: &'a Id) { + self.add_id_checked(id, Kind::is_trait, "Trait"); + } + + fn add_struct_enum_union_id(&mut self, id: &'a Id) { + self.add_id_checked(id, Kind::is_struct_enum_union, "Struct or Enum or Union"); + } + + /// Add an Id that appeared in a trait + fn add_trait_item_id(&mut self, id: &'a Id) { + self.add_id_checked(id, Kind::can_appear_in_trait, "Trait inner item"); + } + + /// Add an Id that appeared in a mod + fn add_mod_item_id(&mut self, id: &'a Id) { + self.add_id_checked(id, Kind::can_appear_in_mod, "Module inner item") + } + + fn fail_expecting(&mut self, id: &Id, expected: &str) { + let kind = self.kind_of(id).unwrap(); // We know it has a kind, as it's wrong. + self.fail(id, ErrorKind::Custom(format!("Expected {expected} but found {kind:?}"))); + } + + fn fail(&mut self, id: &Id, kind: ErrorKind) { + self.errs.push(Error { id: id.clone(), kind }); + } + + fn kind_of(&mut self, id: &Id) -> Option { + if let Some(item) = self.krate.index.get(id) { + Some(Kind::from_item(item)) + } else if let Some(summary) = self.krate.paths.get(id) { + Some(Kind::from_summary(summary)) + } else { + None + } + } +} + +fn set_remove(set: &mut HashSet) -> Option { + if let Some(id) = set.iter().next() { + let id = id.clone(); + set.take(&id) + } else { + None + } +} diff --git a/triagebot.toml b/triagebot.toml index 4b2dcc246e4ee..12a55fda7ef4d 100644 --- a/triagebot.toml +++ b/triagebot.toml @@ -130,8 +130,8 @@ trigger_files = [ # Internal tooling "src/etc/htmldocck.py", - "src/etc/check_missing_items.py", "src/tools/jsondocck", + "src/tools/jsondoclint", "src/tools/rustdoc-gui", "src/tools/rustdoc-js", "src/tools/rustdoc-themes", @@ -142,11 +142,11 @@ exclude_labels = [ [autolabel."A-rustdoc-json"] trigger_files = [ - "src/etc/check_missing_items.py", "src/librustdoc/json/", "src/rustdoc-json-types", "src/test/rustdoc-json", "src/tools/jsondocck", + "src/tools/jsondoclint", ] [autolabel."T-compiler"]