Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pylint] Implement consider-dict-items (C0206) #11688

Merged
merged 19 commits into from
Jun 6, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
ORCHESTRA = {
"violin": "strings",
"oboe": "woodwind",
"tuba": "brass",
"gong": "percussion",
}

# Errors
for instrument in ORCHESTRA:
print(f"{instrument}: {ORCHESTRA[instrument]}")

for instrument in ORCHESTRA:
ORCHESTRA[instrument]

for instrument in ORCHESTRA.keys():
print(f"{instrument}: {ORCHESTRA[instrument]}")

for instrument in ORCHESTRA.keys():
ORCHESTRA[instrument]

for instrument in (temp_orchestra := {"violin": "strings", "oboe": "woodwind"}):
print(f"{instrument}: {temp_orchestra[instrument]}")

for instrument in (temp_orchestra := {"violin": "strings", "oboe": "woodwind"}):
temp_orchestra[instrument]

# # OK
for instrument, section in ORCHESTRA.items():
print(f"{instrument}: {section}")

for instrument, section in ORCHESTRA.items():
section

for instrument, section in (
temp_orchestra := {"violin": "strings", "oboe": "woodwind"}
).items():
print(f"{instrument}: {section}")

for instrument, section in (
temp_orchestra := {"violin": "strings", "oboe": "woodwind"}
).items():
section

for instrument in ORCHESTRA:
ORCHESTRA[instrument] = 3


# Shouldn't trigger for non-dict types
items = {1, 2, 3, 4}
for i in items:
items[i]

items = [1, 2, 3, 4]
for i in items:
items[i]
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use ruff_python_ast::Stmt;

use crate::checkers::ast::Checker;
use crate::codes::Rule;
use crate::rules::{flake8_bugbear, flake8_simplify, perflint, pyupgrade, refurb};
use crate::rules::{flake8_bugbear, flake8_simplify, perflint, pylint, pyupgrade, refurb};

/// Run lint rules over all deferred for-loops in the [`SemanticModel`].
pub(crate) fn deferred_for_loops(checker: &mut Checker) {
Expand Down Expand Up @@ -33,6 +33,9 @@ pub(crate) fn deferred_for_loops(checker: &mut Checker) {
if checker.enabled(Rule::LoopIteratorMutation) {
flake8_bugbear::rules::loop_iterator_mutation(checker, stmt_for);
}
if checker.enabled(Rule::DictIndexMissingItems) {
pylint::rules::dict_index_missing_items(checker, stmt_for);
}
}
}
}
1 change: 1 addition & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1332,6 +1332,7 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) {
pylint::rules::too_many_nested_blocks(checker, stmt);
}
if checker.any_enabled(&[
Rule::DictIndexMissingItems,
Rule::EnumerateForLoop,
Rule::IncorrectDictIterator,
Rule::LoopIteratorMutation,
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Pylint, "C0131") => (RuleGroup::Stable, rules::pylint::rules::TypeBivariance),
(Pylint, "C0132") => (RuleGroup::Stable, rules::pylint::rules::TypeParamNameMismatch),
(Pylint, "C0205") => (RuleGroup::Stable, rules::pylint::rules::SingleStringSlots),
(Pylint, "C0206") => (RuleGroup::Preview, rules::pylint::rules::DictIndexMissingItems),
(Pylint, "C0208") => (RuleGroup::Stable, rules::pylint::rules::IterationOverSet),
(Pylint, "C0414") => (RuleGroup::Stable, rules::pylint::rules::UselessImportAlias),
(Pylint, "C0415") => (RuleGroup::Preview, rules::pylint::rules::ImportOutsideTopLevel),
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/pylint/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ mod tests {
#[test_case(Rule::SuperWithoutBrackets, Path::new("super_without_brackets.py"))]
#[test_case(Rule::SelfOrClsAssignment, Path::new("self_or_cls_assignment.py"))]
#[test_case(Rule::TooManyNestedBlocks, Path::new("too_many_nested_blocks.py"))]
#[test_case(Rule::DictIndexMissingItems, Path::new("dict_index_missing_items.py"))]
#[test_case(Rule::DictIterMissingItems, Path::new("dict_iter_missing_items.py"))]
#[test_case(
Rule::UnnecessaryDictIndexLookup,
Expand Down
182 changes: 182 additions & 0 deletions crates/ruff_linter/src/rules/pylint/rules/dict_index_missing_items.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::comparable::ComparableExpr;
use ruff_python_ast::{
self as ast,
visitor::{self, Visitor},
Expr, ExprContext,
};
use ruff_python_semantic::analyze::type_inference::{PythonType, ResolvedPythonType};
use ruff_python_semantic::analyze::typing::is_dict;
use ruff_python_semantic::SemanticModel;
use ruff_text_size::Ranged;

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for dictionary iterations that extract the dictionary value
/// via explicit indexing, instead of using `.items()`.
///
/// ## Why is this bad?
/// Iterating over a dictionary with `.items()` is semantically clearer
/// and more efficient than extracting the value with the key.
///
/// ## Example
/// ```python
/// ORCHESTRA = {
/// "violin": "strings",
/// "oboe": "woodwind",
/// "tuba": "brass",
/// "gong": "percussion",
/// }
///
/// for instrument in ORCHESTRA:
/// print(f"{instrument}: {ORCHESTRA[instrument]}")
/// ```
///
/// Use instead:
/// ```python
/// ORCHESTRA = {
/// "violin": "strings",
/// "oboe": "woodwind",
/// "tuba": "brass",
/// "gong": "percussion",
/// }
///
/// for instrument, section in ORCHESTRA.items():
/// print(f"{instrument}: {section}")
/// ```

#[violation]
pub struct DictIndexMissingItems;

impl Violation for DictIndexMissingItems {
#[derive_message_formats]
fn message(&self) -> String {
format!("Extracting value from dictionary without calling `.items()`")
}
}

/// PLC0206
pub(crate) fn dict_index_missing_items(checker: &mut Checker, stmt_for: &ast::StmtFor) {
let ast::StmtFor {
target, iter, body, ..
} = stmt_for;

// Extract the name of the iteration object (e.g., `obj` in `for key in obj:`).
let Some(dict_name) = extract_dict_name(iter) else {
return;
};

// Determine if the right-hand side is a dictionary literal (i.e. `for key in (dict := {"a": 1}):`).
let is_dict_literal = matches!(
ResolvedPythonType::from(&**iter),
ResolvedPythonType::Atom(PythonType::Dict),
);

if !is_dict_literal && !is_inferred_dict(dict_name, checker.semantic()) {
return;
}

let has_violation = {
let mut visitor = SubscriptVisitor::new(target, dict_name);
for stmt in body {
visitor.visit_stmt(stmt);
}
visitor.has_violation
};

if has_violation {
let diagnostic = Diagnostic::new(DictIndexMissingItems, stmt_for.range());
checker.diagnostics.push(diagnostic);
}
}

/// A visitor to detect subscript operations on a target dictionary.
struct SubscriptVisitor<'a> {
/// The target of the for loop (e.g., `key` in `for key in obj:`).
target: &'a Expr,
/// The name of the iterated object (e.g., `obj` in `for key in obj:`).
dict_name: &'a ast::ExprName,
/// Whether a violation has been detected.
has_violation: bool,
}

impl<'a> SubscriptVisitor<'a> {
fn new(target: &'a Expr, dict_name: &'a ast::ExprName) -> Self {
Self {
target,
dict_name,
has_violation: false,
}
}
}

impl<'a> Visitor<'a> for SubscriptVisitor<'a> {
fn visit_expr(&mut self, expr: &'a Expr) {
// Given `obj[key]`, `value` must be `obj` and `slice` must be `key`.
if let Expr::Subscript(ast::ExprSubscript {
value,
slice,
ctx: ExprContext::Load,
..
}) = expr
{
let Expr::Name(name) = value.as_ref() else {
return;
};

// Check that the sliced dictionary name is the same as the iterated object name.
if name.id != self.dict_name.id {
return;
}

// Check that the sliced value is the same as the target of the `for` loop.
if ComparableExpr::from(slice) != ComparableExpr::from(self.target) {
return;
}

self.has_violation = true;
} else {
visitor::walk_expr(self, expr);
}
}
}

/// Extracts the name of the dictionary from the expression.
fn extract_dict_name(expr: &Expr) -> Option<&ast::ExprName> {
// Ex) `for key in obj:`
if let Some(name_expr) = expr.as_name_expr() {
return Some(name_expr);
}

// Ex) `for key in obj.keys():`
if let Expr::Call(ast::ExprCall { func, .. }) = expr {
if let Expr::Attribute(ast::ExprAttribute { attr, value, .. }) = func.as_ref() {
if attr == "keys" {
if let Expr::Name(var_name) = value.as_ref() {
return Some(var_name);
}
}
}
}

// Ex) `for key in (my_dict := {"foo": "bar"}):`
if let Expr::Named(ast::ExprNamed { target, value, .. }) = expr {
if let Expr::Dict(ast::ExprDict { .. }) = value.as_ref() {
if let Expr::Name(var_name) = target.as_ref() {
return Some(var_name);
}
}
}

None
}

/// Returns `true` if the binding is a dictionary, inferred from the type.
fn is_inferred_dict(name: &ast::ExprName, semantic: &SemanticModel) -> bool {
semantic
.only_binding(name)
.map(|id| semantic.binding(id))
.is_some_and(|binding| is_dict(binding, semantic))
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ pub(crate) fn dict_iter_missing_items(checker: &mut Checker, target: &Expr, iter
}

// If we can reliably determine that a dictionary has keys that are tuples of two we don't warn
if is_dict_key_tuple_with_two_elements(checker.semantic(), binding) {
if is_dict_key_tuple_with_two_elements(binding, checker.semantic()) {
return;
}

Expand All @@ -86,7 +86,7 @@ pub(crate) fn dict_iter_missing_items(checker: &mut Checker, target: &Expr, iter
}

/// Returns true if the binding is a dictionary where each key is a tuple with two elements.
fn is_dict_key_tuple_with_two_elements(semantic: &SemanticModel, binding: &Binding) -> bool {
fn is_dict_key_tuple_with_two_elements(binding: &Binding, semantic: &SemanticModel) -> bool {
let Some(statement) = binding.statement(semantic) else {
return false;
};
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/pylint/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub(crate) use compare_to_empty_string::*;
pub(crate) use comparison_of_constant::*;
pub(crate) use comparison_with_itself::*;
pub(crate) use continue_in_finally::*;
pub(crate) use dict_index_missing_items::*;
pub(crate) use dict_iter_missing_items::*;
pub(crate) use duplicate_bases::*;
pub(crate) use empty_comment::*;
Expand Down Expand Up @@ -116,6 +117,7 @@ mod compare_to_empty_string;
mod comparison_of_constant;
mod comparison_with_itself;
mod continue_in_finally;
mod dict_index_missing_items;
mod dict_iter_missing_items;
mod duplicate_bases;
mod empty_comment;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
---
source: crates/ruff_linter/src/rules/pylint/mod.rs
---
dict_index_missing_items.py:9:1: PLC0206 Extracting value from dictionary without calling `.items()`
|
8 | # Errors
9 | / for instrument in ORCHESTRA:
10 | | print(f"{instrument}: {ORCHESTRA[instrument]}")
| |___________________________________________________^ PLC0206
11 |
12 | for instrument in ORCHESTRA:
Comment on lines +4 to +11
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be quite useful to have a infrastructure to provide additional details here similar to Biome. That would avoid highlighting the entire for statement and instead we could highlight individual pieces of code with details.

|

dict_index_missing_items.py:12:1: PLC0206 Extracting value from dictionary without calling `.items()`
|
10 | print(f"{instrument}: {ORCHESTRA[instrument]}")
11 |
12 | / for instrument in ORCHESTRA:
13 | | ORCHESTRA[instrument]
| |_________________________^ PLC0206
14 |
15 | for instrument in ORCHESTRA.keys():
|

dict_index_missing_items.py:15:1: PLC0206 Extracting value from dictionary without calling `.items()`
|
13 | ORCHESTRA[instrument]
14 |
15 | / for instrument in ORCHESTRA.keys():
16 | | print(f"{instrument}: {ORCHESTRA[instrument]}")
| |___________________________________________________^ PLC0206
17 |
18 | for instrument in ORCHESTRA.keys():
|

dict_index_missing_items.py:18:1: PLC0206 Extracting value from dictionary without calling `.items()`
|
16 | print(f"{instrument}: {ORCHESTRA[instrument]}")
17 |
18 | / for instrument in ORCHESTRA.keys():
19 | | ORCHESTRA[instrument]
| |_________________________^ PLC0206
20 |
21 | for instrument in (temp_orchestra := {"violin": "strings", "oboe": "woodwind"}):
|

dict_index_missing_items.py:21:1: PLC0206 Extracting value from dictionary without calling `.items()`
|
19 | ORCHESTRA[instrument]
20 |
21 | / for instrument in (temp_orchestra := {"violin": "strings", "oboe": "woodwind"}):
22 | | print(f"{instrument}: {temp_orchestra[instrument]}")
| |________________________________________________________^ PLC0206
23 |
24 | for instrument in (temp_orchestra := {"violin": "strings", "oboe": "woodwind"}):
|

dict_index_missing_items.py:24:1: PLC0206 Extracting value from dictionary without calling `.items()`
|
22 | print(f"{instrument}: {temp_orchestra[instrument]}")
23 |
24 | / for instrument in (temp_orchestra := {"violin": "strings", "oboe": "woodwind"}):
25 | | temp_orchestra[instrument]
| |______________________________^ PLC0206
26 |
27 | # # OK
|
1 change: 1 addition & 0 deletions ruff.schema.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading