From 3e15b26dc4df1b101705b34bab74465d764477b7 Mon Sep 17 00:00:00 2001 From: Mike Bernard Date: Sun, 17 Dec 2023 18:42:17 -0800 Subject: [PATCH 1/6] [pylint] add PLR2044 empty_comment --- Cargo.lock | 1 + .../test/fixtures/pylint/empty_comment.py | 40 +++ .../src/checkers/physical_lines.rs | 21 ++ crates/ruff_linter/src/codes.rs | 1 + crates/ruff_linter/src/registry.rs | 1 + crates/ruff_linter/src/rules/pylint/mod.rs | 1 + .../src/rules/pylint/rules/empty_comment.rs | 93 +++++++ .../ruff_linter/src/rules/pylint/rules/mod.rs | 2 + ...lint__tests__PLR2044_empty_comment.py.snap | 84 ++++++ crates/ruff_python_trivia/Cargo.toml | 1 + .../ruff_python_trivia/src/comment_ranges.rs | 251 +++++++++++++++++- ruff.schema.json | 2 + 12 files changed, 497 insertions(+), 1 deletion(-) create mode 100644 crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py create mode 100644 crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs create mode 100644 crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap diff --git a/Cargo.lock b/Cargo.lock index be9c99c71e9fc..6ec9c3b2ac557 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2454,6 +2454,7 @@ dependencies = [ "insta", "itertools 0.11.0", "ruff_python_ast", + "ruff_python_index", "ruff_python_parser", "ruff_source_file", "ruff_text_size", diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py b/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py new file mode 100644 index 0000000000000..2488732a8168f --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py @@ -0,0 +1,40 @@ +# this line has a non-empty comment and is OK + # this line is also OK, but the three following lines are not +# + # + # + +# this non-empty comment has trailing whitespace and is OK + +# Many codebases use multiple `#` characters on a single line to visually +# separate sections of code, so we don't consider these empty comments. + +########################################## + +# trailing `#` characters are not considered empty comments ### + + +def foo(): # this comment is OK, the one below is not + pass # + + +# the lines below have no comments and are OK +def bar(): + pass + + +# "Empty comments" are common in block comments +# to add legibility. For example: +# +# The above line's "empty comment" is likely +# intentional and is considered OK. + + +# lines in multi-line strings/comments whose last non-whitespace character is a `#` +# do not count as empty comments +""" +The following lines are all fine: +# + # + # +""" diff --git a/crates/ruff_linter/src/checkers/physical_lines.rs b/crates/ruff_linter/src/checkers/physical_lines.rs index a2b8b98b85c34..2025e1e32be52 100644 --- a/crates/ruff_linter/src/checkers/physical_lines.rs +++ b/crates/ruff_linter/src/checkers/physical_lines.rs @@ -1,4 +1,7 @@ //! Lint rules based on checking physical lines. + +use std::collections::HashSet; + use ruff_diagnostics::Diagnostic; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; @@ -32,9 +35,18 @@ pub(crate) fn check_physical_lines( let enforce_blank_line_contains_whitespace = settings.rules.enabled(Rule::BlankLineWithWhitespace); let enforce_copyright_notice = settings.rules.enabled(Rule::MissingCopyrightNotice); + let enforce_empty_comment = settings.rules.enabled(Rule::EmptyComment); let mut doc_lines_iter = doc_lines.iter().peekable(); + let block_comment_line_offsets: HashSet = indexer + .comment_ranges() + .block_comments(locator) + .into_iter() + .flatten() + .map(|o| locator.line_start(o)) + .collect(); + for line in locator.contents().universal_newlines() { while doc_lines_iter .next_if(|doc_line_start| line.range().contains_inclusive(**doc_line_start)) @@ -68,6 +80,15 @@ pub(crate) fn check_physical_lines( diagnostics.push(diagnostic); } } + + if enforce_empty_comment + && !block_comment_line_offsets.contains(&line.start()) + && indexer.comment_ranges().intersects(line.range()) + { + if let Some(diagnostic) = pylint::rules::empty_comment(&line) { + diagnostics.push(diagnostic); + } + } } if enforce_no_newline_at_end_of_file { diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 550960ce275af..df4cbb196cbf6 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -265,6 +265,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Pylint, "R1733") => (RuleGroup::Preview, rules::pylint::rules::UnnecessaryDictIndexLookup), (Pylint, "R1736") => (RuleGroup::Preview, rules::pylint::rules::UnnecessaryListIndexLookup), (Pylint, "R2004") => (RuleGroup::Stable, rules::pylint::rules::MagicValueComparison), + (Pylint, "R2044") => (RuleGroup::Stable, rules::pylint::rules::EmptyComment), (Pylint, "R5501") => (RuleGroup::Stable, rules::pylint::rules::CollapsibleElseIf), (Pylint, "R6201") => (RuleGroup::Preview, rules::pylint::rules::LiteralMembership), #[allow(deprecated)] diff --git a/crates/ruff_linter/src/registry.rs b/crates/ruff_linter/src/registry.rs index 2e6fcc199580e..3fba58247ab05 100644 --- a/crates/ruff_linter/src/registry.rs +++ b/crates/ruff_linter/src/registry.rs @@ -250,6 +250,7 @@ impl Rule { Rule::BidirectionalUnicode | Rule::BlankLineWithWhitespace | Rule::DocLineTooLong + | Rule::EmptyComment | Rule::LineTooLong | Rule::MissingCopyrightNotice | Rule::MissingNewlineAtEndOfFile diff --git a/crates/ruff_linter/src/rules/pylint/mod.rs b/crates/ruff_linter/src/rules/pylint/mod.rs index 303395de50406..02161bad780b7 100644 --- a/crates/ruff_linter/src/rules/pylint/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/mod.rs @@ -40,6 +40,7 @@ mod tests { )] #[test_case(Rule::ComparisonWithItself, Path::new("comparison_with_itself.py"))] #[test_case(Rule::EqWithoutHash, Path::new("eq_without_hash.py"))] + #[test_case(Rule::EmptyComment, Path::new("empty_comment.py"))] #[test_case(Rule::ManualFromImport, Path::new("import_aliasing.py"))] #[test_case(Rule::SingleStringSlots, Path::new("single_string_slots.py"))] #[test_case(Rule::SysExitAlias, Path::new("sys_exit_alias_0.py"))] diff --git a/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs new file mode 100644 index 0000000000000..e51afe26dd71d --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs @@ -0,0 +1,93 @@ +use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_trivia::is_python_whitespace; +use ruff_source_file::newlines::Line; +use ruff_text_size::{TextRange, TextSize}; + +/// ## What it does +/// Checks for a # symbol appearing on a line not followed by an actual comment. +/// +/// ## Why is this bad? +/// Empty comments don't provide any clarity to the code, and just add clutter. +/// Either add a comment or delete the empty comment. +/// +/// ## Example +/// ```python +/// class Foo: # +/// pass +/// ``` +/// +/// Use instead: +/// ```python +/// class Foo: +/// pass +/// ``` +/// +/// ## References +/// - [Pylint documentation](https://pylint.pycqa.org/en/latest/user_guide/messages/refactor/empty-comment.html) +#[violation] +pub struct EmptyComment; + +impl Violation for EmptyComment { + const FIX_AVAILABILITY: FixAvailability = FixAvailability::Always; + + #[derive_message_formats] + fn message(&self) -> String { + format!("Line with empty comment") + } + + fn fix_title(&self) -> Option { + Some(format!("Delete the empty comment")) + } +} + +/// PLR2044 +pub(crate) fn empty_comment(line: &Line) -> Option { + let first_hash_col = u32::try_from( + line.as_str() + .char_indices() + .find(|&(_, char)| char == '#')? // indicates the line does not contain a comment + .0, + ) + .unwrap(); // SAFETY: already know line is valid and all TextSize indices are u32 + + let last_non_whitespace_col = u32::try_from( + line.as_str() + .char_indices() + .rev() + .find(|&(_, char)| !is_python_whitespace(char)) + .unwrap() // SAFETY: already verified at least one '#' char is in the line + .0, + ) + .unwrap(); // SAFETY: already know line is valid and all TextSize indices are u32 + + if last_non_whitespace_col != first_hash_col { + return None; // the comment is not empty + } + + let deletion_start_col = match line + .as_str() + .char_indices() + .rev() + .find(|&(_, c)| !is_python_whitespace(c) && c != '#') + { + Some((last_non_whitespace_non_comment_col, _)) => { + // SAFETY: (last_non_whitespace_non_comment_col + 1) <= u32::MAX because last_non_whitespace_col <= u32::MAX + // and last_non_whitespace_non_comment_col < last_non_whitespace_col + line.start() + + TextSize::new(u32::try_from(last_non_whitespace_non_comment_col + 1).unwrap()) + } + None => line.start(), + }; + + Some( + Diagnostic::new( + EmptyComment, + TextRange::new(line.start() + TextSize::new(first_hash_col), line.end()), + ) + .with_fix(Fix::safe_edit(Edit::deletion( + deletion_start_col, + line.end(), + ))), + ) +} diff --git a/crates/ruff_linter/src/rules/pylint/rules/mod.rs b/crates/ruff_linter/src/rules/pylint/rules/mod.rs index 85699ab30e1ed..28c0a5c980794 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/mod.rs @@ -14,6 +14,7 @@ pub(crate) use comparison_of_constant::*; pub(crate) use comparison_with_itself::*; pub(crate) use continue_in_finally::*; pub(crate) use duplicate_bases::*; +pub(crate) use empty_comment::*; pub(crate) use eq_without_hash::*; pub(crate) use global_at_module_level::*; pub(crate) use global_statement::*; @@ -92,6 +93,7 @@ mod comparison_of_constant; mod comparison_with_itself; mod continue_in_finally; mod duplicate_bases; +mod empty_comment; mod eq_without_hash; mod global_at_module_level; mod global_statement; diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap new file mode 100644 index 0000000000000..1472d0412901e --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap @@ -0,0 +1,84 @@ +--- +source: crates/ruff_linter/src/rules/pylint/mod.rs +--- +empty_comment.py:3:1: PLR2044 [*] Line with empty comment + | +1 | # this line has a non-empty comment and is OK +2 | # this line is also OK, but the three following lines are not +3 | # + | ^ PLR2044 +4 | # +5 | # + | + = help: Delete the empty comment + +ℹ Safe fix +1 1 | # this line has a non-empty comment and is OK +2 2 | # this line is also OK, but the three following lines are not +3 |-# + 3 |+ +4 4 | # +5 5 | # +6 6 | + +empty_comment.py:4:5: PLR2044 [*] Line with empty comment + | +2 | # this line is also OK, but the three following lines are not +3 | # +4 | # + | ^ PLR2044 +5 | # + | + = help: Delete the empty comment + +ℹ Safe fix +1 1 | # this line has a non-empty comment and is OK +2 2 | # this line is also OK, but the three following lines are not +3 3 | # +4 |- # + 4 |+ +5 5 | # +6 6 | +7 7 | # this non-empty comment has trailing whitespace and is OK + +empty_comment.py:5:9: PLR2044 [*] Line with empty comment + | +3 | # +4 | # +5 | # + | ^^^^^ PLR2044 +6 | +7 | # this non-empty comment has trailing whitespace and is OK + | + = help: Delete the empty comment + +ℹ Safe fix +2 2 | # this line is also OK, but the three following lines are not +3 3 | # +4 4 | # +5 |- # +6 5 | + 6 |+ +7 7 | # this non-empty comment has trailing whitespace and is OK +8 8 | +9 9 | # Many codebases use multiple `#` characters on a single line to visually + +empty_comment.py:18:11: PLR2044 [*] Line with empty comment + | +17 | def foo(): # this comment is OK, the one below is not +18 | pass # + | ^^^^^ PLR2044 + | + = help: Delete the empty comment + +ℹ Safe fix +15 15 | +16 16 | +17 17 | def foo(): # this comment is OK, the one below is not +18 |- pass # + 18 |+ pass +19 19 | +20 20 | +21 21 | # the lines below have no comments and are OK + + diff --git a/crates/ruff_python_trivia/Cargo.toml b/crates/ruff_python_trivia/Cargo.toml index e17aafc44b3b5..fd5fb1f6dcf57 100644 --- a/crates/ruff_python_trivia/Cargo.toml +++ b/crates/ruff_python_trivia/Cargo.toml @@ -23,6 +23,7 @@ unicode-ident = { workspace = true } insta = { workspace = true } ruff_python_ast = { path = "../ruff_python_ast" } ruff_python_parser = { path = "../ruff_python_parser" } +ruff_python_index = { path = "../ruff_python_index" } [lints] workspace = true diff --git a/crates/ruff_python_trivia/src/comment_ranges.rs b/crates/ruff_python_trivia/src/comment_ranges.rs index a5b7b7ed50ddb..3e923ad04a205 100644 --- a/crates/ruff_python_trivia/src/comment_ranges.rs +++ b/crates/ruff_python_trivia/src/comment_ranges.rs @@ -2,8 +2,11 @@ use std::fmt::{Debug, Formatter}; use std::ops::Deref; use itertools::Itertools; +use ruff_source_file::Locator; -use ruff_text_size::{Ranged, TextRange}; +use ruff_text_size::{Ranged, TextRange, TextSize}; + +use crate::is_python_whitespace; /// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping. #[derive(Clone, Default)] @@ -45,6 +48,98 @@ impl CommentRanges { None => &self.raw[start..], } } + + /// Given a `CommentRanges`, determine which comments are grouped together + /// in block comments. Block comments are defined as a sequence of consecutive + /// lines which only contain comments and which all contain the first + /// `#` character in the same column. + /// + /// Returns a vector of vectors, with each representing a block comment, and + /// the values of each being the offset of the leading `#` character of the comment. + /// + /// Example: + /// ```python + /// # This is a block comment + /// # because it spans multiple lines + /// + /// # This is also a block comment + /// # even though it is indented + /// + /// # this is not a block comment + /// + /// x = 1 # this is not a block comment because + /// y = 2 # the lines do not *only* contain comments + /// + /// # This is not a block comment because + /// # not all consecutive lines have the + /// # first `#` character in the same column + /// + /// """ + /// # This is not a block comment because it is + /// # contained within a multi-line string/comment + /// """ + /// ``` + pub fn block_comments(&self, locator: &Locator) -> Vec> { + let mut block_comments: Vec> = Vec::new(); + + let mut current_block: Vec = Vec::new(); + let mut current_block_column: Option = None; + let mut current_block_offset: Option = None; + + for comment_range in &self.raw { + let offset = comment_range.start(); + let line_start = locator.line_start(offset); + + if CommentRanges::any_code_before_comment(locator, line_start) { + if current_block.len() > 1 { + block_comments.push(current_block); + current_block = vec![]; + current_block_column = None; + current_block_offset = None; + } + continue; + } + + let line_end = locator.full_line_end(offset).to_u32(); + let column = (offset - line_start).to_u32(); + + if let Some(c) = current_block_column { + if let Some(o) = current_block_offset { + if column == c && line_start.to_u32() == o { + current_block.push(offset); + current_block_offset = Some(line_end); + } else { + if current_block.len() > 1 { + block_comments.push(current_block); + } + current_block = vec![offset]; + current_block_column = Some(column); + current_block_offset = Some(line_end); + } + } + } else { + current_block = vec![offset]; + current_block_column = Some(column); + current_block_offset = Some(line_end); + } + } + if current_block.len() > 1 { + block_comments.push(current_block); + } + block_comments + } + + fn any_code_before_comment(locator: &Locator, offset_line_start: TextSize) -> bool { + let contents = locator.full_line(offset_line_start); + for char in contents.chars() { + if char == '#' || char == '\r' || char == '\n' { + return false; + } else if !is_python_whitespace(char) { + return true; + } + } + unreachable!("The above loop should always return with a valid Locator"); + } } impl Deref for CommentRanges { @@ -69,3 +164,157 @@ impl<'a> IntoIterator for &'a CommentRanges { self.raw.iter() } } + +#[cfg(test)] +mod tests { + use ruff_python_index::Indexer; + use ruff_python_parser::lexer::LexResult; + use ruff_python_parser::{tokenize, Mode}; + use ruff_source_file::Locator; + use ruff_text_size::TextSize; + + #[test] + fn block_comments_two_line_block_at_start() { + // arrange + let source = "# line 1\n# line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!( + block_comments, + vec![vec![TextSize::new(0), TextSize::new(9)]] + ); + } + + #[test] + fn block_comments_indented_block() { + // arrange + let source = " # line 1\n # line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!( + block_comments, + vec![vec![TextSize::new(4), TextSize::new(17)]] + ); + } + + #[test] + fn block_comments_single_line_is_not_a_block() { + // arrange + let source = "\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::>::new()); + } + + #[test] + fn block_comments_lines_with_code_not_a_block() { + // arrange + let source = "x = 1 # line 1\ny = 2 # line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::>::new()); + } + + #[test] + fn block_comments_sequential_lines_not_in_block() { + // arrange + let source = " # line 1\n # line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::>::new()); + } + + #[test] + fn block_comments_lines_in_triple_quotes_not_a_block() { + // arrange + let source = r#" + """ + # line 1 + # line 2 + """ + "#; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::>::new()); + } + + #[test] + fn block_comments_stress_test() { + // arrange + let source = r#" +# block comment 1 line 1 +# block comment 2 line 2 + +# these lines + # do not form +# a block comment + +x = 1 # these lines also do not +y = 2 # do not form a block comment + +# these lines do form a block comment +# + + # + # and so do these + # + +""" +# these lines are in triple quotes and +# therefore do not form a block comment +""" + "#; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!( + block_comments, + vec![ + vec![TextSize::new(1), TextSize::new(26)], + vec![TextSize::new(174), TextSize::new(212)], + vec![TextSize::new(219), TextSize::new(225), TextSize::new(247)] + ] + ); + } +} diff --git a/ruff.schema.json b/ruff.schema.json index 3761d705180f7..46f6e8fcd9f1f 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3200,6 +3200,8 @@ "PLR20", "PLR200", "PLR2004", + "PLR204", + "PLR2044", "PLR5", "PLR55", "PLR550", From 19d9f3bc18238df74c3c2a98cdb78221e5853fea Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 28 Dec 2023 11:40:24 -0500 Subject: [PATCH 2/6] Move to preview --- crates/ruff_linter/src/codes.rs | 2 +- crates/ruff_python_trivia/src/comment_ranges.rs | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 9c0d897a44acf..321b485e85e0d 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -265,7 +265,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Pylint, "R1733") => (RuleGroup::Preview, rules::pylint::rules::UnnecessaryDictIndexLookup), (Pylint, "R1736") => (RuleGroup::Preview, rules::pylint::rules::UnnecessaryListIndexLookup), (Pylint, "R2004") => (RuleGroup::Stable, rules::pylint::rules::MagicValueComparison), - (Pylint, "R2044") => (RuleGroup::Stable, rules::pylint::rules::EmptyComment), + (Pylint, "R2044") => (RuleGroup::Preview, rules::pylint::rules::EmptyComment), (Pylint, "R5501") => (RuleGroup::Stable, rules::pylint::rules::CollapsibleElseIf), (Pylint, "R6201") => (RuleGroup::Preview, rules::pylint::rules::LiteralMembership), #[allow(deprecated)] diff --git a/crates/ruff_python_trivia/src/comment_ranges.rs b/crates/ruff_python_trivia/src/comment_ranges.rs index 3e923ad04a205..09e1e9edbc6a6 100644 --- a/crates/ruff_python_trivia/src/comment_ranges.rs +++ b/crates/ruff_python_trivia/src/comment_ranges.rs @@ -90,7 +90,7 @@ impl CommentRanges { let offset = comment_range.start(); let line_start = locator.line_start(offset); - if CommentRanges::any_code_before_comment(locator, line_start) { + if Self::is_end_of_line(locator, line_start) { if current_block.len() > 1 { block_comments.push(current_block); current_block = vec![]; @@ -129,7 +129,8 @@ impl CommentRanges { block_comments } - fn any_code_before_comment(locator: &Locator, offset_line_start: TextSize) -> bool { + /// Returns `true` if a comment is an end-of-line comment (as opposed to an own-line comment). + fn is_end_of_line(locator: &Locator, offset_line_start: TextSize) -> bool { let contents = locator.full_line(offset_line_start); for char in contents.chars() { if char == '#' || char == '\r' || char == '\n' { @@ -138,7 +139,7 @@ impl CommentRanges { return true; } } - unreachable!("The above loop should always return with a valid Locator"); + false } } From 72223bc2e9cf0718475ecfd32649b84be92b5edd Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 28 Dec 2023 21:13:51 -0500 Subject: [PATCH 3/6] Move to token-based rule; delete entire line --- .../src/checkers/physical_lines.rs | 20 ---- crates/ruff_linter/src/checkers/tokens.rs | 4 + crates/ruff_linter/src/registry.rs | 2 +- .../src/rules/pylint/rules/empty_comment.rs | 91 +++++++++++-------- ...lint__tests__PLR2044_empty_comment.py.snap | 20 ++-- .../ruff_python_trivia/src/comment_ranges.rs | 88 +++++++++--------- 6 files changed, 107 insertions(+), 118 deletions(-) diff --git a/crates/ruff_linter/src/checkers/physical_lines.rs b/crates/ruff_linter/src/checkers/physical_lines.rs index 2025e1e32be52..fbb9abff633e3 100644 --- a/crates/ruff_linter/src/checkers/physical_lines.rs +++ b/crates/ruff_linter/src/checkers/physical_lines.rs @@ -1,7 +1,5 @@ //! Lint rules based on checking physical lines. -use std::collections::HashSet; - use ruff_diagnostics::Diagnostic; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; @@ -35,18 +33,9 @@ pub(crate) fn check_physical_lines( let enforce_blank_line_contains_whitespace = settings.rules.enabled(Rule::BlankLineWithWhitespace); let enforce_copyright_notice = settings.rules.enabled(Rule::MissingCopyrightNotice); - let enforce_empty_comment = settings.rules.enabled(Rule::EmptyComment); let mut doc_lines_iter = doc_lines.iter().peekable(); - let block_comment_line_offsets: HashSet = indexer - .comment_ranges() - .block_comments(locator) - .into_iter() - .flatten() - .map(|o| locator.line_start(o)) - .collect(); - for line in locator.contents().universal_newlines() { while doc_lines_iter .next_if(|doc_line_start| line.range().contains_inclusive(**doc_line_start)) @@ -80,15 +69,6 @@ pub(crate) fn check_physical_lines( diagnostics.push(diagnostic); } } - - if enforce_empty_comment - && !block_comment_line_offsets.contains(&line.start()) - && indexer.comment_ranges().intersects(line.range()) - { - if let Some(diagnostic) = pylint::rules::empty_comment(&line) { - diagnostics.push(diagnostic); - } - } } if enforce_no_newline_at_end_of_file { diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index ae7643d92f7e1..c67c21ba4d326 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -40,6 +40,10 @@ pub(crate) fn check_tokens( pygrep_hooks::rules::blanket_type_ignore(&mut diagnostics, indexer, locator); } + if settings.rules.enabled(Rule::EmptyComment) { + pylint::rules::empty_comments(&mut diagnostics, indexer, locator); + } + if settings.rules.any_enabled(&[ Rule::AmbiguousUnicodeCharacterString, Rule::AmbiguousUnicodeCharacterDocstring, diff --git a/crates/ruff_linter/src/registry.rs b/crates/ruff_linter/src/registry.rs index 3fba58247ab05..21499e9608492 100644 --- a/crates/ruff_linter/src/registry.rs +++ b/crates/ruff_linter/src/registry.rs @@ -250,7 +250,6 @@ impl Rule { Rule::BidirectionalUnicode | Rule::BlankLineWithWhitespace | Rule::DocLineTooLong - | Rule::EmptyComment | Rule::LineTooLong | Rule::MissingCopyrightNotice | Rule::MissingNewlineAtEndOfFile @@ -266,6 +265,7 @@ impl Rule { | Rule::BlanketNOQA | Rule::BlanketTypeIgnore | Rule::CommentedOutCode + | Rule::EmptyComment | Rule::ExtraneousParentheses | Rule::InvalidCharacterBackspace | Rule::InvalidCharacterEsc diff --git a/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs index e51afe26dd71d..1581b6de1ef31 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs @@ -1,8 +1,11 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_index::Indexer; use ruff_python_trivia::is_python_whitespace; -use ruff_source_file::newlines::Line; + +use ruff_source_file::Locator; use ruff_text_size::{TextRange, TextSize}; +use rustc_hash::FxHashSet; /// ## What it does /// Checks for a # symbol appearing on a line not followed by an actual comment. @@ -42,52 +45,62 @@ impl Violation for EmptyComment { } /// PLR2044 -pub(crate) fn empty_comment(line: &Line) -> Option { - let first_hash_col = u32::try_from( - line.as_str() - .char_indices() - .find(|&(_, char)| char == '#')? // indicates the line does not contain a comment - .0, - ) - .unwrap(); // SAFETY: already know line is valid and all TextSize indices are u32 +pub(crate) fn empty_comments( + diagnostics: &mut Vec, + indexer: &Indexer, + locator: &Locator, +) { + let block_comments = FxHashSet::from_iter(indexer.comment_ranges().block_comments(locator)); - let last_non_whitespace_col = u32::try_from( - line.as_str() - .char_indices() - .rev() - .find(|&(_, char)| !is_python_whitespace(char)) - .unwrap() // SAFETY: already verified at least one '#' char is in the line - .0, - ) - .unwrap(); // SAFETY: already know line is valid and all TextSize indices are u32 + for range in indexer.comment_ranges() { + // Ignore comments that are part of multi-line "comment blocks". + if block_comments.contains(&range.start()) { + continue; + } - if last_non_whitespace_col != first_hash_col { - return None; // the comment is not empty + // If the line contains an empty comment, add a diagnostic. + if let Some(diagnostic) = empty_comment(*range, locator) { + diagnostics.push(diagnostic); + } } +} - let deletion_start_col = match line - .as_str() +/// Return a [`Diagnostic`] if the comment at the given [`TextRange`] is empty. +fn empty_comment(range: TextRange, locator: &Locator) -> Option { + // Check: is the comment empty? + if !locator + .slice(range) + .chars() + .skip(1) + .all(is_python_whitespace) + { + return None; + } + + // Find the location of the `#`. + let first_hash_col = range.start(); + + // Find the start of the line. + let line = locator.line_range(first_hash_col); + + // Find the last character in the line that precedes the comment, if any. + let deletion_start_col = locator + .slice(TextRange::new(line.start(), first_hash_col)) .char_indices() .rev() .find(|&(_, c)| !is_python_whitespace(c) && c != '#') - { - Some((last_non_whitespace_non_comment_col, _)) => { - // SAFETY: (last_non_whitespace_non_comment_col + 1) <= u32::MAX because last_non_whitespace_col <= u32::MAX - // and last_non_whitespace_non_comment_col < last_non_whitespace_col - line.start() - + TextSize::new(u32::try_from(last_non_whitespace_non_comment_col + 1).unwrap()) - } - None => line.start(), - }; + .map(|(last_non_whitespace_non_comment_col, _)| { + // SAFETY: (last_non_whitespace_non_comment_col + 1) <= first_hash_col + TextSize::try_from(last_non_whitespace_non_comment_col).unwrap() + TextSize::new(1) + }); Some( - Diagnostic::new( - EmptyComment, - TextRange::new(line.start() + TextSize::new(first_hash_col), line.end()), - ) - .with_fix(Fix::safe_edit(Edit::deletion( - deletion_start_col, - line.end(), - ))), + Diagnostic::new(EmptyComment, TextRange::new(first_hash_col, line.end())).with_fix( + Fix::safe_edit(if let Some(deletion_start_col) = deletion_start_col { + Edit::deletion(line.start() + deletion_start_col, line.end()) + } else { + Edit::range_deletion(locator.full_line_range(first_hash_col)) + }), + ), ) } diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap index 1472d0412901e..8de31a60003bf 100644 --- a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap @@ -16,10 +16,9 @@ empty_comment.py:3:1: PLR2044 [*] Line with empty comment 1 1 | # this line has a non-empty comment and is OK 2 2 | # this line is also OK, but the three following lines are not 3 |-# - 3 |+ -4 4 | # -5 5 | # -6 6 | +4 3 | # +5 4 | # +6 5 | empty_comment.py:4:5: PLR2044 [*] Line with empty comment | @@ -36,10 +35,9 @@ empty_comment.py:4:5: PLR2044 [*] Line with empty comment 2 2 | # this line is also OK, but the three following lines are not 3 3 | # 4 |- # - 4 |+ -5 5 | # -6 6 | -7 7 | # this non-empty comment has trailing whitespace and is OK +5 4 | # +6 5 | +7 6 | # this non-empty comment has trailing whitespace and is OK empty_comment.py:5:9: PLR2044 [*] Line with empty comment | @@ -58,10 +56,8 @@ empty_comment.py:5:9: PLR2044 [*] Line with empty comment 4 4 | # 5 |- # 6 5 | - 6 |+ -7 7 | # this non-empty comment has trailing whitespace and is OK -8 8 | -9 9 | # Many codebases use multiple `#` characters on a single line to visually +7 6 | # this non-empty comment has trailing whitespace and is OK +8 7 | empty_comment.py:18:11: PLR2044 [*] Line with empty comment | diff --git a/crates/ruff_python_trivia/src/comment_ranges.rs b/crates/ruff_python_trivia/src/comment_ranges.rs index 09e1e9edbc6a6..664ca7c965da3 100644 --- a/crates/ruff_python_trivia/src/comment_ranges.rs +++ b/crates/ruff_python_trivia/src/comment_ranges.rs @@ -49,15 +49,15 @@ impl CommentRanges { } } - /// Given a `CommentRanges`, determine which comments are grouped together - /// in block comments. Block comments are defined as a sequence of consecutive - /// lines which only contain comments and which all contain the first - /// `#` character in the same column. + /// Given a [`CommentRanges`], determine which comments are grouped together + /// in "comment blocks". A "comment block" is a sequence of consecutive + /// own-line comments in which the comment hash (`#`) appears in the same + /// column in each line. /// - /// Returns a vector of vectors, with each representing a block comment, and - /// the values of each being the offset of the leading `#` character of the comment. + /// Returns a vector containing the offset of the leading hash (`#`) for + /// each comment in any block comment. /// - /// Example: + /// ## Examples /// ```python /// # This is a block comment /// # because it spans multiple lines @@ -79,20 +79,20 @@ impl CommentRanges { /// # contained within a multi-line string/comment /// """ /// ``` - pub fn block_comments(&self, locator: &Locator) -> Vec> { - let mut block_comments: Vec> = Vec::new(); + pub fn block_comments(&self, locator: &Locator) -> Vec { + let mut block_comments: Vec = Vec::new(); let mut current_block: Vec = Vec::new(); - let mut current_block_column: Option = None; - let mut current_block_offset: Option = None; + let mut current_block_column: Option = None; + let mut current_block_offset: Option = None; for comment_range in &self.raw { let offset = comment_range.start(); let line_start = locator.line_start(offset); - if Self::is_end_of_line(locator, line_start) { + if !Self::is_own_line(locator, offset) { if current_block.len() > 1 { - block_comments.push(current_block); + block_comments.extend(current_block); current_block = vec![]; current_block_column = None; current_block_offset = None; @@ -100,17 +100,17 @@ impl CommentRanges { continue; } - let line_end = locator.full_line_end(offset).to_u32(); - let column = (offset - line_start).to_u32(); + let line_end = locator.full_line_end(offset); + let column = offset - line_start; - if let Some(c) = current_block_column { - if let Some(o) = current_block_offset { - if column == c && line_start.to_u32() == o { + if let Some(current_column) = current_block_column { + if let Some(current_offset) = current_block_offset { + if column == current_column && line_start == current_offset { current_block.push(offset); current_block_offset = Some(line_end); } else { if current_block.len() > 1 { - block_comments.push(current_block); + block_comments.extend(current_block); } current_block = vec![offset]; current_block_column = Some(column); @@ -123,23 +123,18 @@ impl CommentRanges { current_block_offset = Some(line_end); } } + if current_block.len() > 1 { - block_comments.push(current_block); + block_comments.extend(current_block); } + block_comments } - /// Returns `true` if a comment is an end-of-line comment (as opposed to an own-line comment). - fn is_end_of_line(locator: &Locator, offset_line_start: TextSize) -> bool { - let contents = locator.full_line(offset_line_start); - for char in contents.chars() { - if char == '#' || char == '\r' || char == '\n' { - return false; - } else if !is_python_whitespace(char) { - return true; - } - } - false + /// Returns `true` if a comment is an own-line comment (as opposed to an end-of-line comment). + fn is_own_line(locator: &Locator, offset: TextSize) -> bool { + let range = TextRange::new(locator.line_start(offset), offset); + locator.slice(range).chars().all(is_python_whitespace) } } @@ -186,10 +181,7 @@ mod tests { let block_comments = indexer.comment_ranges().block_comments(&locator); // assert - assert_eq!( - block_comments, - vec![vec![TextSize::new(0), TextSize::new(9)]] - ); + assert_eq!(block_comments, vec![TextSize::new(0), TextSize::new(9)]); } #[test] @@ -204,10 +196,7 @@ mod tests { let block_comments = indexer.comment_ranges().block_comments(&locator); // assert - assert_eq!( - block_comments, - vec![vec![TextSize::new(4), TextSize::new(17)]] - ); + assert_eq!(block_comments, vec![TextSize::new(4), TextSize::new(17)]); } #[test] @@ -222,7 +211,7 @@ mod tests { let block_comments = indexer.comment_ranges().block_comments(&locator); // assert - assert_eq!(block_comments, Vec::>::new()); + assert_eq!(block_comments, Vec::::new()); } #[test] @@ -237,7 +226,7 @@ mod tests { let block_comments = indexer.comment_ranges().block_comments(&locator); // assert - assert_eq!(block_comments, Vec::>::new()); + assert_eq!(block_comments, Vec::::new()); } #[test] @@ -252,7 +241,7 @@ mod tests { let block_comments = indexer.comment_ranges().block_comments(&locator); // assert - assert_eq!(block_comments, Vec::>::new()); + assert_eq!(block_comments, Vec::::new()); } #[test] @@ -272,7 +261,7 @@ mod tests { let block_comments = indexer.comment_ranges().block_comments(&locator); // assert - assert_eq!(block_comments, Vec::>::new()); + assert_eq!(block_comments, Vec::::new()); } #[test] @@ -312,9 +301,16 @@ y = 2 # do not form a block comment assert_eq!( block_comments, vec![ - vec![TextSize::new(1), TextSize::new(26)], - vec![TextSize::new(174), TextSize::new(212)], - vec![TextSize::new(219), TextSize::new(225), TextSize::new(247)] + // Block #1 + TextSize::new(1), + TextSize::new(26), + // Block #2 + TextSize::new(174), + TextSize::new(212), + // Block #3 + TextSize::new(219), + TextSize::new(225), + TextSize::new(247) ] ); } From 32119ddf2606620a6c0e6008ce7bb17949e099dc Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 28 Dec 2023 21:28:08 -0500 Subject: [PATCH 4/6] Add empty check --- .../test/fixtures/pylint/empty_comment.py | 7 ++++ .../ruff_python_trivia/src/comment_ranges.rs | 37 +++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py b/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py index 2488732a8168f..4d6f5e2d4687f 100644 --- a/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py +++ b/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py @@ -38,3 +38,10 @@ def bar(): # # """ + +# These should be removed, despite being an empty "block comment". + +x = 1 + +# +# diff --git a/crates/ruff_python_trivia/src/comment_ranges.rs b/crates/ruff_python_trivia/src/comment_ranges.rs index 664ca7c965da3..c53466ede9e7a 100644 --- a/crates/ruff_python_trivia/src/comment_ranges.rs +++ b/crates/ruff_python_trivia/src/comment_ranges.rs @@ -85,18 +85,21 @@ impl CommentRanges { let mut current_block: Vec = Vec::new(); let mut current_block_column: Option = None; let mut current_block_offset: Option = None; + let mut current_block_non_empty = false; for comment_range in &self.raw { let offset = comment_range.start(); let line_start = locator.line_start(offset); - if !Self::is_own_line(locator, offset) { - if current_block.len() > 1 { + if !Self::is_own_line(offset, locator) { + // Push the current block, and reset. + if current_block.len() > 1 && current_block_non_empty { block_comments.extend(current_block); - current_block = vec![]; - current_block_column = None; - current_block_offset = None; } + current_block = vec![]; + current_block_column = None; + current_block_offset = None; + current_block_non_empty = false; continue; } @@ -106,33 +109,51 @@ impl CommentRanges { if let Some(current_column) = current_block_column { if let Some(current_offset) = current_block_offset { if column == current_column && line_start == current_offset { + // Add the comment to the current block. current_block.push(offset); current_block_offset = Some(line_end); + current_block_non_empty |= !Self::is_empty(*comment_range, locator); } else { - if current_block.len() > 1 { + // Push the current block, and reset. + if current_block.len() > 1 && current_block_non_empty { block_comments.extend(current_block); } current_block = vec![offset]; current_block_column = Some(column); current_block_offset = Some(line_end); + current_block_non_empty |= !Self::is_empty(*comment_range, locator); } } } else { + // Push the current block, and reset. + if current_block.len() > 1 && current_block_non_empty { + block_comments.extend(current_block); + } current_block = vec![offset]; current_block_column = Some(column); current_block_offset = Some(line_end); + current_block_non_empty |= !Self::is_empty(*comment_range, locator); } } - if current_block.len() > 1 { + if current_block.len() > 1 && current_block_non_empty { block_comments.extend(current_block); } block_comments } + /// Returns `true` if the given range is an empty comment. + fn is_empty(range: TextRange, locator: &Locator) -> bool { + locator + .slice(range) + .chars() + .skip(1) + .all(is_python_whitespace) + } + /// Returns `true` if a comment is an own-line comment (as opposed to an end-of-line comment). - fn is_own_line(locator: &Locator, offset: TextSize) -> bool { + fn is_own_line(offset: TextSize, locator: &Locator) -> bool { let range = TextRange::new(locator.line_start(offset), offset); locator.slice(range).chars().all(is_python_whitespace) } From 20e8827dd16279774a034c9a9bc5af03fdabf2f4 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 28 Dec 2023 21:33:10 -0500 Subject: [PATCH 5/6] Handle blank lines --- ...lint__tests__PLR2044_empty_comment.py.snap | 30 +++++++++++++++++++ .../ruff_python_trivia/src/comment_ranges.rs | 26 ++++++++++++++-- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap index 8de31a60003bf..3c9f54e6abee9 100644 --- a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap @@ -77,4 +77,34 @@ empty_comment.py:18:11: PLR2044 [*] Line with empty comment 20 20 | 21 21 | # the lines below have no comments and are OK +empty_comment.py:46:1: PLR2044 [*] Line with empty comment + | +44 | x = 1 +45 | +46 | # + | ^ PLR2044 +47 | # + | + = help: Delete the empty comment + +ℹ Safe fix +44 44 | x = 1 +45 45 | +46 46 | # +47 |-# + +empty_comment.py:47:1: PLR2044 [*] Line with empty comment + | +46 | # +47 | # + | ^ PLR2044 + | + = help: Delete the empty comment + +ℹ Safe fix +44 44 | x = 1 +45 45 | +46 46 | # +47 |-# + diff --git a/crates/ruff_python_trivia/src/comment_ranges.rs b/crates/ruff_python_trivia/src/comment_ranges.rs index c53466ede9e7a..4e2b2d97041eb 100644 --- a/crates/ruff_python_trivia/src/comment_ranges.rs +++ b/crates/ruff_python_trivia/src/comment_ranges.rs @@ -87,10 +87,14 @@ impl CommentRanges { let mut current_block_offset: Option = None; let mut current_block_non_empty = false; + let mut prev_line_end = None; + for comment_range in &self.raw { let offset = comment_range.start(); let line_start = locator.line_start(offset); + let line_end = locator.full_line_end(offset); + // If this is an end-of-line comment, reset the current block. if !Self::is_own_line(offset, locator) { // Push the current block, and reset. if current_block.len() > 1 && current_block_non_empty { @@ -100,12 +104,28 @@ impl CommentRanges { current_block_column = None; current_block_offset = None; current_block_non_empty = false; + prev_line_end = Some(line_end); continue; } - let line_end = locator.full_line_end(offset); - let column = offset - line_start; + // If there's a blank line between this comment and the previous + // comment, reset the current block. + if prev_line_end.is_some_and(|prev_line_end| { + locator.contains_line_break(TextRange::new(prev_line_end, line_start)) + }) { + // Push the current block, and reset. + if current_block.len() > 1 && current_block_non_empty { + block_comments.extend(current_block); + } + current_block = vec![]; + current_block_column = None; + current_block_offset = None; + current_block_non_empty = false; + prev_line_end = Some(line_end); + continue; + } + let column = offset - line_start; if let Some(current_column) = current_block_column { if let Some(current_offset) = current_block_offset { if column == current_column && line_start == current_offset { @@ -134,6 +154,8 @@ impl CommentRanges { current_block_offset = Some(line_end); current_block_non_empty |= !Self::is_empty(*comment_range, locator); } + + prev_line_end = Some(line_end); } if current_block.len() > 1 && current_block_non_empty { From 6d78bd6b17e9ce0a14224f67b056bc4b72245f52 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 28 Dec 2023 21:48:52 -0500 Subject: [PATCH 6/6] Fix ranges --- .../test/fixtures/pylint/empty_comment.py | 14 ++-- ...lint__tests__PLR2044_empty_comment.py.snap | 66 +++++++++++-------- .../ruff_python_trivia/src/comment_ranges.rs | 58 ++++++++-------- 3 files changed, 76 insertions(+), 62 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py b/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py index 4d6f5e2d4687f..34e2cc0fb7f60 100644 --- a/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py +++ b/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py @@ -2,9 +2,9 @@ # this line is also OK, but the three following lines are not # # - # + # -# this non-empty comment has trailing whitespace and is OK +# this non-empty comment has trailing whitespace and is OK # Many codebases use multiple `#` characters on a single line to visually # separate sections of code, so we don't consider these empty comments. @@ -15,7 +15,7 @@ def foo(): # this comment is OK, the one below is not - pass # + pass # # the lines below have no comments and are OK @@ -36,12 +36,18 @@ def bar(): The following lines are all fine: # # - # + # """ # These should be removed, despite being an empty "block comment". +# +# + +# These should also be removed. + x = 1 # +## # diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap index 3c9f54e6abee9..4339810f22c21 100644 --- a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap @@ -8,7 +8,7 @@ empty_comment.py:3:1: PLR2044 [*] Line with empty comment 3 | # | ^ PLR2044 4 | # -5 | # +5 | # | = help: Delete the empty comment @@ -17,7 +17,7 @@ empty_comment.py:3:1: PLR2044 [*] Line with empty comment 2 2 | # this line is also OK, but the three following lines are not 3 |-# 4 3 | # -5 4 | # +5 4 | # 6 5 | empty_comment.py:4:5: PLR2044 [*] Line with empty comment @@ -26,7 +26,7 @@ empty_comment.py:4:5: PLR2044 [*] Line with empty comment 3 | # 4 | # | ^ PLR2044 -5 | # +5 | # | = help: Delete the empty comment @@ -35,18 +35,18 @@ empty_comment.py:4:5: PLR2044 [*] Line with empty comment 2 2 | # this line is also OK, but the three following lines are not 3 3 | # 4 |- # -5 4 | # +5 4 | # 6 5 | -7 6 | # this non-empty comment has trailing whitespace and is OK +7 6 | # this non-empty comment has trailing whitespace and is OK empty_comment.py:5:9: PLR2044 [*] Line with empty comment | 3 | # 4 | # -5 | # - | ^^^^^ PLR2044 +5 | # + | ^ PLR2044 6 | -7 | # this non-empty comment has trailing whitespace and is OK +7 | # this non-empty comment has trailing whitespace and is OK | = help: Delete the empty comment @@ -54,16 +54,16 @@ empty_comment.py:5:9: PLR2044 [*] Line with empty comment 2 2 | # this line is also OK, but the three following lines are not 3 3 | # 4 4 | # -5 |- # +5 |- # 6 5 | -7 6 | # this non-empty comment has trailing whitespace and is OK +7 6 | # this non-empty comment has trailing whitespace and is OK 8 7 | empty_comment.py:18:11: PLR2044 [*] Line with empty comment | 17 | def foo(): # this comment is OK, the one below is not -18 | pass # - | ^^^^^ PLR2044 +18 | pass # + | ^ PLR2044 | = help: Delete the empty comment @@ -71,40 +71,48 @@ empty_comment.py:18:11: PLR2044 [*] Line with empty comment 15 15 | 16 16 | 17 17 | def foo(): # this comment is OK, the one below is not -18 |- pass # +18 |- pass # 18 |+ pass 19 19 | 20 20 | 21 21 | # the lines below have no comments and are OK -empty_comment.py:46:1: PLR2044 [*] Line with empty comment +empty_comment.py:44:1: PLR2044 [*] Line with empty comment | -44 | x = 1 -45 | -46 | # +42 | # These should be removed, despite being an empty "block comment". +43 | +44 | # | ^ PLR2044 -47 | # +45 | # | = help: Delete the empty comment ℹ Safe fix -44 44 | x = 1 -45 45 | -46 46 | # -47 |-# +42 42 | # These should be removed, despite being an empty "block comment". +43 43 | +44 44 | # +45 |-# +46 45 | +47 46 | # These should also be removed. +48 47 | -empty_comment.py:47:1: PLR2044 [*] Line with empty comment +empty_comment.py:45:1: PLR2044 [*] Line with empty comment | -46 | # -47 | # +44 | # +45 | # | ^ PLR2044 +46 | +47 | # These should also be removed. | = help: Delete the empty comment ℹ Safe fix -44 44 | x = 1 -45 45 | -46 46 | # -47 |-# +42 42 | # These should be removed, despite being an empty "block comment". +43 43 | +44 44 | # +45 |-# +46 45 | +47 46 | # These should also be removed. +48 47 | diff --git a/crates/ruff_python_trivia/src/comment_ranges.rs b/crates/ruff_python_trivia/src/comment_ranges.rs index 4e2b2d97041eb..02a42d9e2a41b 100644 --- a/crates/ruff_python_trivia/src/comment_ranges.rs +++ b/crates/ruff_python_trivia/src/comment_ranges.rs @@ -52,7 +52,7 @@ impl CommentRanges { /// Given a [`CommentRanges`], determine which comments are grouped together /// in "comment blocks". A "comment block" is a sequence of consecutive /// own-line comments in which the comment hash (`#`) appears in the same - /// column in each line. + /// column in each line, and at least one comment is non-empty. /// /// Returns a vector containing the offset of the leading hash (`#`) for /// each comment in any block comment. @@ -84,7 +84,6 @@ impl CommentRanges { let mut current_block: Vec = Vec::new(); let mut current_block_column: Option = None; - let mut current_block_offset: Option = None; let mut current_block_non_empty = false; let mut prev_line_end = None; @@ -93,6 +92,7 @@ impl CommentRanges { let offset = comment_range.start(); let line_start = locator.line_start(offset); let line_end = locator.full_line_end(offset); + let column = offset - line_start; // If this is an end-of-line comment, reset the current block. if !Self::is_own_line(offset, locator) { @@ -102,7 +102,6 @@ impl CommentRanges { } current_block = vec![]; current_block_column = None; - current_block_offset = None; current_block_non_empty = false; prev_line_end = Some(line_end); continue; @@ -113,51 +112,52 @@ impl CommentRanges { if prev_line_end.is_some_and(|prev_line_end| { locator.contains_line_break(TextRange::new(prev_line_end, line_start)) }) { - // Push the current block, and reset. + // Push the current block. if current_block.len() > 1 && current_block_non_empty { block_comments.extend(current_block); } - current_block = vec![]; - current_block_column = None; - current_block_offset = None; - current_block_non_empty = false; + + // Reset the block state. + current_block = vec![offset]; + current_block_column = Some(column); + current_block_non_empty = !Self::is_empty(*comment_range, locator); prev_line_end = Some(line_end); continue; } - let column = offset - line_start; if let Some(current_column) = current_block_column { - if let Some(current_offset) = current_block_offset { - if column == current_column && line_start == current_offset { - // Add the comment to the current block. - current_block.push(offset); - current_block_offset = Some(line_end); - current_block_non_empty |= !Self::is_empty(*comment_range, locator); - } else { - // Push the current block, and reset. - if current_block.len() > 1 && current_block_non_empty { - block_comments.extend(current_block); - } - current_block = vec![offset]; - current_block_column = Some(column); - current_block_offset = Some(line_end); - current_block_non_empty |= !Self::is_empty(*comment_range, locator); + if column == current_column { + // Add the comment to the current block. + current_block.push(offset); + current_block_non_empty |= !Self::is_empty(*comment_range, locator); + prev_line_end = Some(line_end); + } else { + // Push the current block. + if current_block.len() > 1 && current_block_non_empty { + block_comments.extend(current_block); } + + // Reset the block state. + current_block = vec![offset]; + current_block_column = Some(column); + current_block_non_empty = !Self::is_empty(*comment_range, locator); + prev_line_end = Some(line_end); } } else { - // Push the current block, and reset. + // Push the current block. if current_block.len() > 1 && current_block_non_empty { block_comments.extend(current_block); } + + // Reset the block state. current_block = vec![offset]; current_block_column = Some(column); - current_block_offset = Some(line_end); - current_block_non_empty |= !Self::is_empty(*comment_range, locator); + current_block_non_empty = !Self::is_empty(*comment_range, locator); + prev_line_end = Some(line_end); } - - prev_line_end = Some(line_end); } + // Push any lingering blocks. if current_block.len() > 1 && current_block_non_empty { block_comments.extend(current_block); }