Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make tidy problematic const checking fast again #127455

Merged
merged 2 commits into from
Jul 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 18 additions & 21 deletions src/tools/tidy/src/style.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
// ignore-tidy-dbg

use crate::walk::{filter_dirs, walk};
use regex::RegexSet;
use rustc_hash::FxHashMap;
use std::{ffi::OsStr, path::Path, sync::LazyLock};
use std::{ffi::OsStr, path::Path};

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -109,32 +110,16 @@ const ROOT_PROBLEMATIC_CONSTS: &[u32] = &[
173390526, 721077,
];

#[cfg(not(test))]
const LETTER_DIGIT: &[(char, char)] = &[('A', '4'), ('B', '8'), ('E', '3')];

#[cfg(test)]
const LETTER_DIGIT: &[(char, char)] = &[('A', '4'), ('B', '8'), ('E', '3'), ('0', 'F')]; // use "futile" F intentionally

// Returns all permutations of problematic consts, over 2000 elements.
fn generate_problematic_strings(
consts: &[u32],
letter_digit: &FxHashMap<char, char>,
) -> Vec<String> {
generate_problems(consts, letter_digit)
.flat_map(|v| vec![v.to_string(), format!("{:X}", v)])
.flat_map(|v| vec![v.to_string(), format!("{:x}", v), format!("{:X}", v)])
.collect()
}

static PROBLEMATIC_CONSTS_STRINGS: LazyLock<Vec<String>> = LazyLock::new(|| {
generate_problematic_strings(
ROOT_PROBLEMATIC_CONSTS,
&FxHashMap::from_iter(LETTER_DIGIT.iter().copied()),
)
});

fn contains_problematic_const(trimmed: &str) -> bool {
PROBLEMATIC_CONSTS_STRINGS.iter().any(|s| trimmed.to_uppercase().contains(s))
}

const INTERNAL_COMPILER_DOCS_LINE: &str = "#### This error code is internal to the compiler and will not be emitted with normal Rust code.";

/// Parser states for `line_is_url`.
Expand Down Expand Up @@ -331,6 +316,13 @@ pub fn check(path: &Path, bad: &mut bool) {
// We only check CSS files in rustdoc.
path.extension().map_or(false, |e| e == "css") && !is_in(path, "src", "librustdoc")
}
let problematic_consts_strings = generate_problematic_strings(
ROOT_PROBLEMATIC_CONSTS,
&[('A', '4'), ('B', '8'), ('E', '3')].iter().cloned().collect(),
);
// This creates a RegexSet as regex contains performance optimizations to be able to deal with these over
// 2000 needles efficiently. This runs over the entire source code, so performance matters.
let problematic_regex = RegexSet::new(problematic_consts_strings.as_slice()).unwrap();

walk(path, skip, &mut |entry, contents| {
let file = entry.path();
Expand Down Expand Up @@ -400,6 +392,7 @@ pub fn check(path: &Path, bad: &mut bool) {
let is_test = file.components().any(|c| c.as_os_str() == "tests");
// scanning the whole file for multiple needles at once is more efficient than
// executing lines times needles separate searches.
let any_problematic_line = problematic_regex.is_match(contents);
for (i, line) in contents.split('\n').enumerate() {
if line.is_empty() {
if i == 0 {
Expand Down Expand Up @@ -469,8 +462,12 @@ pub fn check(path: &Path, bad: &mut bool) {
if trimmed.contains("//") && trimmed.contains(" XXX") {
err("Instead of XXX use FIXME")
}
if contains_problematic_const(trimmed) {
err("Don't use magic numbers that spell things (consider 0x12345678)");
if any_problematic_line {
for s in problematic_consts_strings.iter() {
if trimmed.contains(s) {
err("Don't use magic numbers that spell things (consider 0x12345678)");
}
}
}
}
// for now we just check libcore
Expand Down
19 changes: 13 additions & 6 deletions src/tools/tidy/src/style/tests.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
use super::*;

#[test]
fn test_contains_problematic_const() {
assert!(contains_problematic_const("786357")); // check with no "decimal" hex digits - converted to integer
assert!(contains_problematic_const("589701")); // check with "decimal" replacements - converted to integer
assert!(contains_problematic_const("8FF85")); // check for hex display
assert!(contains_problematic_const("8fF85")); // check for case-alternating hex display
assert!(!contains_problematic_const("1193046")); // check for non-matching value
fn test_generate_problematic_strings() {
let problematic_regex = RegexSet::new(
generate_problematic_strings(
ROOT_PROBLEMATIC_CONSTS,
&[('A', '4'), ('B', '8'), ('E', '3'), ('0', 'F')].iter().cloned().collect(), // use "futile" F intentionally
)
.as_slice(),
)
.unwrap();
assert!(problematic_regex.is_match("786357")); // check with no "decimal" hex digits - converted to integer
assert!(problematic_regex.is_match("589701")); // check with "decimal" replacements - converted to integer
assert!(problematic_regex.is_match("8FF85")); // check for hex display
assert!(!problematic_regex.is_match("1193046")); // check for non-matching value
}
Loading