diff --git a/crates/ruff_linter/src/rules/eradicate/detection.rs b/crates/ruff_linter/src/rules/eradicate/detection.rs index 1798df468aa4e..a5beac11af8e9 100644 --- a/crates/ruff_linter/src/rules/eradicate/detection.rs +++ b/crates/ruff_linter/src/rules/eradicate/detection.rs @@ -16,8 +16,43 @@ static CODE_INDICATORS: LazyLock = LazyLock::new(|| { static ALLOWLIST_REGEX: LazyLock = LazyLock::new(|| { Regex::new( - r"^(?i)(?:pylint|pyright|noqa|nosec|region|endregion|type:\s*ignore|fmt:\s*(on|off)|isort:\s*(on|off|skip|skip_file|split|dont-add-imports(:\s*\[.*?])?)|mypy:|SPDX-License-Identifier:|language=[a-zA-Z](?: ?[-_.a-zA-Z0-9]+)+(?:\s+prefix=\S+)?(?:\s+suffix=\S+)?|(?:en)?coding[:=][ \t]*([-_.a-zA-Z0-9]+))", - ).unwrap() + r"(?x) + ^ + (?: + # Case-sensitive + pyright + | mypy: + | type:\s*ignore + | SPDX-License-Identifier: + | fmt:\s*(on|off|skip) + | region|endregion + + # Case-insensitive + | (?i: + noqa + ) + + # Unknown case sensitivity + | (?i: + pylint + | nosec + | isort:\s*(on|off|skip|skip_file|split|dont-add-imports(:\s*\[.*?])?) + | (?:en)?coding[:=][\x20\t]*([-_.A-Z0-9]+) + ) + + # IntelliJ language injection comments: + # * `language` must be lowercase. + # * No spaces around `=`. + # * Language IDs as used in comments must have no spaces, + # though to IntelliJ they can be anything. + # * May optionally contain `prefix=` and/or `suffix=`, + # not declared here since we use `.is_match()`. + | language=[-_.a-zA-Z0-9]+ + + ) + ", + ) + .unwrap() }); static HASH_NUMBER: LazyLock = LazyLock::new(|| Regex::new(r"#\d").unwrap()); @@ -299,17 +334,42 @@ mod tests { #[test] fn comment_contains_language_injection() { - assert!(comment_contains_code("# language=123", &[])); + // `language` with bad casing + assert!(comment_contains_code("# Language=C#", &[])); + assert!(comment_contains_code("# lAngUAgE=inI", &[])); + + // Unreasonable language IDs, possibly literals assert!(comment_contains_code("# language=\"pt\"", &[])); assert!(comment_contains_code("# language='en'", &[])); - assert!(!comment_contains_code("# language=xml", &[])); + // Spaces around equal sign + assert!(comment_contains_code("# language =xml", &[])); + assert!(comment_contains_code("# language= html", &[])); + assert!(comment_contains_code("# language = RegExp", &[])); + + // Leading whitespace + assert!(!comment_contains_code("#language=CSS", &[])); + assert!(!comment_contains_code("# \t language=C++", &[])); + + // Human language false negatives + assert!(!comment_contains_code("# language=en", &[])); + assert!(!comment_contains_code("# language=en-US", &[])); + + // Casing (fine because such IDs cannot be validated) + assert!(!comment_contains_code("# language=PytHoN", &[])); + assert!(!comment_contains_code("# language=jaVaScrIpt", &[])); + + // Space within ID (fine because `Shell` is considered the ID) + assert!(!comment_contains_code("# language=Shell Script", &[])); + + // With prefix and/or suffix + assert!(!comment_contains_code("# language=HTML prefix=", &[])); assert!(!comment_contains_code( - "# language=HTML prefix= suffix=", + r"# language=Requirements suffix=\n", &[] )); assert!(!comment_contains_code( - "# language=ecma script level 4", + "language=javascript prefix=(function(){ suffix=})()", &[] )); }