From 91ae81b565da07f8711712ff4e175ed7262bc630 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Sat, 17 Feb 2024 22:31:31 +0530 Subject: [PATCH] Move `RUF001`, `RUF002` to AST checker (#9993) ## Summary Part of #7595 This PR moves the `RUF001` and `RUF002` rules to the AST checker. This removes the use of docstring detection from these rules. ## Test Plan As this is just a refactor, make sure existing test cases pass. --- .../src/checkers/ast/analyze/string_like.rs | 8 ++- crates/ruff_linter/src/checkers/tokens.rs | 33 +++--------- crates/ruff_linter/src/registry.rs | 2 - .../ruff/rules/ambiguous_unicode_character.rs | 53 +++++++++++++++++-- 4 files changed, 63 insertions(+), 33 deletions(-) diff --git a/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs b/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs index c3c8fb3367eec..5af18e67adf54 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs @@ -2,10 +2,16 @@ use ruff_python_ast::StringLike; use crate::checkers::ast::Checker; use crate::codes::Rule; -use crate::rules::{flake8_bandit, flake8_pyi}; +use crate::rules::{flake8_bandit, flake8_pyi, ruff}; /// Run lint rules over a [`StringLike`] syntax nodes. pub(crate) fn string_like(string_like: StringLike, checker: &mut Checker) { + if checker.any_enabled(&[ + Rule::AmbiguousUnicodeCharacterString, + Rule::AmbiguousUnicodeCharacterDocstring, + ]) { + ruff::rules::ambiguous_unicode_character_string(checker, string_like); + } if checker.enabled(Rule::HardcodedBindAllInterfaces) { flake8_bandit::rules::hardcoded_bind_all_interfaces(checker, string_like); } diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index 27662f02e6d73..c676645815669 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -6,17 +6,14 @@ use ruff_notebook::CellOffsets; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::Tok; use ruff_diagnostics::Diagnostic; use ruff_python_index::Indexer; use ruff_source_file::Locator; use crate::directives::TodoComment; -use crate::lex::docstring_detection::StateMachine; use crate::registry::{AsRule, Rule}; use crate::rules::pycodestyle::rules::BlankLinesChecker; -use crate::rules::ruff::rules::Context; use crate::rules::{ eradicate, flake8_commas, flake8_executable, flake8_fixme, flake8_implicit_str_concat, flake8_pyi, flake8_quotes, flake8_todos, pycodestyle, pygrep_hooks, pylint, pyupgrade, ruff, @@ -66,31 +63,15 @@ pub(crate) fn check_tokens( pylint::rules::empty_comments(&mut diagnostics, indexer, locator); } - if settings.rules.any_enabled(&[ - Rule::AmbiguousUnicodeCharacterString, - Rule::AmbiguousUnicodeCharacterDocstring, - Rule::AmbiguousUnicodeCharacterComment, - ]) { - let mut state_machine = StateMachine::default(); - for &(ref tok, range) in tokens.iter().flatten() { - let is_docstring = state_machine.consume(tok); - let context = match tok { - Tok::String { .. } => { - if is_docstring { - Context::Docstring - } else { - Context::String - } - } - Tok::FStringMiddle { .. } => Context::String, - Tok::Comment(_) => Context::Comment, - _ => continue, - }; - ruff::rules::ambiguous_unicode_character( + if settings + .rules + .enabled(Rule::AmbiguousUnicodeCharacterComment) + { + for range in indexer.comment_ranges() { + ruff::rules::ambiguous_unicode_character_comment( &mut diagnostics, locator, - range, - context, + *range, settings, ); } diff --git a/crates/ruff_linter/src/registry.rs b/crates/ruff_linter/src/registry.rs index 1b59f90419bd3..e85f14d7116af 100644 --- a/crates/ruff_linter/src/registry.rs +++ b/crates/ruff_linter/src/registry.rs @@ -256,8 +256,6 @@ impl Rule { | Rule::MixedSpacesAndTabs | Rule::TrailingWhitespace => LintSource::PhysicalLines, Rule::AmbiguousUnicodeCharacterComment - | Rule::AmbiguousUnicodeCharacterDocstring - | Rule::AmbiguousUnicodeCharacterString | Rule::AvoidableEscapedQuote | Rule::BadQuotesDocstring | Rule::BadQuotesInlineString diff --git a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs index 9f8e52fa0b829..b3600d62fd3e2 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs @@ -4,9 +4,11 @@ use bitflags::bitflags; use ruff_diagnostics::{Diagnostic, DiagnosticKind, Violation}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::StringLike; use ruff_source_file::Locator; -use ruff_text_size::{TextLen, TextRange, TextSize}; +use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; +use crate::checkers::ast::Checker; use crate::registry::AsRule; use crate::rules::ruff::rules::confusables::confusable; use crate::rules::ruff::rules::Context; @@ -171,16 +173,59 @@ impl Violation for AmbiguousUnicodeCharacterComment { } } -/// RUF001, RUF002, RUF003 -pub(crate) fn ambiguous_unicode_character( +/// RUF003 +pub(crate) fn ambiguous_unicode_character_comment( diagnostics: &mut Vec, locator: &Locator, range: TextRange, - context: Context, settings: &LinterSettings, ) { let text = locator.slice(range); + ambiguous_unicode_character(diagnostics, text, range, Context::Comment, settings); +} +/// RUF001, RUF002 +pub(crate) fn ambiguous_unicode_character_string(checker: &mut Checker, string_like: StringLike) { + let context = if checker.semantic().in_docstring() { + Context::Docstring + } else { + Context::String + }; + + match string_like { + StringLike::StringLiteral(string_literal) => { + for string in &string_literal.value { + let text = checker.locator().slice(string); + ambiguous_unicode_character( + &mut checker.diagnostics, + text, + string.range(), + context, + checker.settings, + ); + } + } + StringLike::FStringLiteral(f_string_literal) => { + let text = checker.locator().slice(f_string_literal); + ambiguous_unicode_character( + &mut checker.diagnostics, + text, + f_string_literal.range(), + context, + checker.settings, + ); + } + StringLike::BytesLiteral(_) => (), + } +} + +fn ambiguous_unicode_character( + diagnostics: &mut Vec, + text: &str, + range: TextRange, + context: Context, + settings: &LinterSettings, +) { // Most of the time, we don't need to check for ambiguous unicode characters at all. if text.is_ascii() { return;