Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add checks for dataclasses #3877

Merged
merged 2 commits into from
Apr 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions crates/ruff/resources/test/fixtures/ruff/RUF008.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from dataclasses import dataclass, field

KNOWINGLY_MUTABLE_DEFAULT = []


@dataclass()
class A:
mutable_default: list[int] = []
without_annotation = []
ignored_via_comment: list[int] = [] # noqa: RUF008
correct_code: list[int] = KNOWINGLY_MUTABLE_DEFAULT
perfectly_fine: list[int] = field(default_factory=list)


@dataclass
class B:
mutable_default: list[int] = []
without_annotation = []
ignored_via_comment: list[int] = [] # noqa: RUF008
correct_code: list[int] = KNOWINGLY_MUTABLE_DEFAULT
perfectly_fine: list[int] = field(default_factory=list)
28 changes: 28 additions & 0 deletions crates/ruff/resources/test/fixtures/ruff/RUF009.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from dataclasses import dataclass
from typing import NamedTuple


def default_function() -> list[int]:
return []


class ImmutableType(NamedTuple):
something: int = 8


@dataclass()
class A:
hidden_mutable_default: list[int] = default_function()


DEFAULT_IMMUTABLETYPE_FOR_ALL_DATACLASSES = ImmutableType(40)
DEFAULT_A_FOR_ALL_DATACLASSES = A([1, 2, 3])


@dataclass
class B:
hidden_mutable_default: list[int] = default_function()
another_dataclass: A = A()
not_optimal: ImmutableType = ImmutableType(20)
good_variant: ImmutableType = DEFAULT_IMMUTABLETYPE_FOR_ALL_DATACLASSES
okay_variant: A = DEFAULT_A_FOR_ALL_DATACLASSES
18 changes: 18 additions & 0 deletions crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,24 @@ where
flake8_pie::rules::non_unique_enums(self, stmt, body);
}

if self.settings.rules.any_enabled(&[
Rule::MutableDataclassDefault,
Rule::FunctionCallInDataclassDefaultArgument,
]) && ruff::rules::is_dataclass(self, decorator_list)
{
if self.settings.rules.enabled(Rule::MutableDataclassDefault) {
ruff::rules::mutable_dataclass_default(self, body);
}

if self
.settings
.rules
.enabled(Rule::FunctionCallInDataclassDefaultArgument)
{
ruff::rules::function_call_in_dataclass_defaults(self, body);
}
}

self.check_builtin_shadowing(name, stmt, false);

for expr in bases {
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,8 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<Rule> {
(Ruff, "005") => Rule::CollectionLiteralConcatenation,
(Ruff, "006") => Rule::AsyncioDanglingTask,
(Ruff, "007") => Rule::PairwiseOverZipped,
(Ruff, "008") => Rule::MutableDataclassDefault,
(Ruff, "009") => Rule::FunctionCallInDataclassDefaultArgument,
(Ruff, "100") => Rule::UnusedNOQA,

// flake8-django
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,8 @@ ruff_macros::register_rules!(
rules::ruff::rules::AsyncioDanglingTask,
rules::ruff::rules::UnusedNOQA,
rules::ruff::rules::PairwiseOverZipped,
rules::ruff::rules::MutableDataclassDefault,
rules::ruff::rules::FunctionCallInDataclassDefaultArgument,
// flake8-django
rules::flake8_django::rules::DjangoNullableModelStringField,
rules::flake8_django::rules::DjangoLocalsInRenderFunction,
Expand Down
12 changes: 12 additions & 0 deletions crates/ruff/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,16 @@ mod tests {
assert_yaml_snapshot!(diagnostics);
Ok(())
}

#[test_case(Rule::MutableDataclassDefault, Path::new("RUF008.py"); "RUF008")]
#[test_case(Rule::FunctionCallInDataclassDefaultArgument, Path::new("RUF009.py"); "RUF009")]
fn mutable_defaults(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Path::new("ruff").join(path).as_path(),
&settings::Settings::for_rule(rule_code),
)?;
assert_yaml_snapshot!(snapshot, diagnostics);
Ok(())
}
}
5 changes: 5 additions & 0 deletions crates/ruff/src/rules/ruff/rules/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod ambiguous_unicode_character;
mod asyncio_dangling_task;
mod collection_literal_concatenation;
mod mutable_defaults_in_dataclass_fields;
mod pairwise_over_zipped;
mod unused_noqa;

Expand All @@ -12,6 +13,10 @@ pub use asyncio_dangling_task::{asyncio_dangling_task, AsyncioDanglingTask};
pub use collection_literal_concatenation::{
collection_literal_concatenation, CollectionLiteralConcatenation,
};
pub use mutable_defaults_in_dataclass_fields::{
function_call_in_dataclass_defaults, is_dataclass, mutable_dataclass_default,
FunctionCallInDataclassDefaultArgument, MutableDataclassDefault,
};
pub use pairwise_over_zipped::{pairwise_over_zipped, PairwiseOverZipped};
pub use unused_noqa::{UnusedCodes, UnusedNOQA};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
use rustpython_parser::ast::{Expr, ExprKind, Stmt, StmtKind};

use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{call_path::compose_call_path, helpers::map_callable, types::Range};
use ruff_python_semantic::context::Context;

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for mutable default values in dataclasses without the use of
/// `dataclasses.field`.
///
/// ## Why is it bad?
/// Mutable default values share state across all instances of the dataclass,
/// while not being obvious. This can lead to bugs when the attributes are
/// changed in one instance, as those changes will unexpectedly affect all
/// other instances.
///
/// ## Examples:
/// ```python
/// from dataclasses import dataclass
///
/// @dataclass
/// class A:
/// mutable_default: list[int] = []
/// ```
///
/// Use instead:
/// ```python
/// from dataclasses import dataclass, field
///
/// @dataclass
/// class A:
/// mutable_default: list[int] = field(default_factory=list)
/// ```
///
/// Alternatively, if you _want_ shared behaviour, make it more obvious
/// by assigning to a module-level variable:
/// ```python
/// from dataclasses import dataclass
///
/// I_KNOW_THIS_IS_SHARED_STATE = [1, 2, 3, 4]
///
/// @dataclass
/// class A:
/// mutable_default: list[int] = I_KNOW_THIS_IS_SHARED_STATE
/// ```
#[violation]
pub struct MutableDataclassDefault;

impl Violation for MutableDataclassDefault {
#[derive_message_formats]
fn message(&self) -> String {
format!("Do not use mutable default values for dataclass attributes")
}
}

/// ## What it does
/// Checks for function calls in dataclass defaults.
///
/// ## Why is it bad?
/// Function calls are only performed once, at definition time. The returned
/// value is then reused by all instances of the dataclass.
///
/// ## Examples:
/// ```python
/// from dataclasses import dataclass
///
/// def creating_list() -> list[]:
/// return [1, 2, 3, 4]
///
/// @dataclass
/// class A:
/// mutable_default: list[int] = creating_list()
///
/// # also:
///
/// @dataclass
/// class B:
/// also_mutable_default_but_sneakier: A = A()
/// ```
///
/// Use instead:
/// ```python
/// from dataclasses import dataclass, field
///
/// def creating_list() -> list[]:
/// return [1, 2, 3, 4]
///
/// @dataclass
/// class A:
/// mutable_default: list[int] = field(default_factory=creating_list)
///
/// @dataclass
/// class B:
/// also_mutable_default_but_sneakier: A = field(default_factory=A)
/// ```
///
/// Alternatively, if you _want_ the shared behaviour, make it more obvious
/// by assigning it to a module-level variable:
/// ```python
/// from dataclasses import dataclass
///
/// def creating_list() -> list[]:
/// return [1, 2, 3, 4]
///
/// I_KNOW_THIS_IS_SHARED_STATE = creating_list()
///
/// @dataclass
/// class A:
/// mutable_default: list[int] = I_KNOW_THIS_IS_SHARED_STATE
/// ```
#[violation]
pub struct FunctionCallInDataclassDefaultArgument {
pub name: Option<String>,
}

impl Violation for FunctionCallInDataclassDefaultArgument {
#[derive_message_formats]
fn message(&self) -> String {
let FunctionCallInDataclassDefaultArgument { name } = self;
if let Some(name) = name {
format!("Do not perform function call `{name}` in dataclass defaults")
} else {
format!("Do not perform function call in dataclass defaults")
}
}
}

fn is_mutable_expr(expr: &Expr) -> bool {
matches!(
&expr.node,
ExprKind::List { .. }
| ExprKind::Dict { .. }
| ExprKind::Set { .. }
| ExprKind::ListComp { .. }
| ExprKind::DictComp { .. }
| ExprKind::SetComp { .. }
)
}

const ALLOWED_FUNCS: &[&[&str]] = &[&["dataclasses", "field"]];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may want to make this a setting, to allow users to allow certain functions here. But, we can wait on that, see if it's a real need that comes up in Issues.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually thought about that (mainly because the bugbear implementation has something similar), but I couldn't come up with something I would really want there. The only thing that is sensible in my mind at least, would be something like "registering" your NamedTuples as they are immutable or ignoring the error for one specific function.

So yeah personally I did leave it as "kinda prepared if necessary, but only if needed in the future" :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, now that I've revisited that thought, it could also match the IMMUTABLE_FUNCS from here (function_call_argument_default.rs) + the added dataclass-specific dataclass.field one.

I'm not sure if you want to do that as it would probably mean exporting one or the other (or both) and having configuration options for two (very far apart) rule checkers in one of them; or duplicating the array on both sides.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also this would have implications for "user-added" things. As "sharing" the list would mean the user adds a to-ignore function to the flake8-bugbear-config and it gets ignored in the dataclass as well.

Therefore I'm leaning more to a duplicate of the array at the moment.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm okay to skip for now. Let's see if it comes up. If you do feel like implementing, my gut reaction is just to duplicate for now, but there's not great precedent to follow (it's odd to have two different configuration options for what are really very similar rules, but we also tend to avoid sharing configuration across "sub-linters").


fn is_allowed_func(context: &Context, func: &Expr) -> bool {
context.resolve_call_path(func).map_or(false, |call_path| {
ALLOWED_FUNCS
.iter()
.any(|target| call_path.as_slice() == *target)
})
}

/// RUF009
pub fn function_call_in_dataclass_defaults(checker: &mut Checker, body: &[Stmt]) {
mosauter marked this conversation as resolved.
Show resolved Hide resolved
for statement in body {
if let StmtKind::AnnAssign {
value: Some(expr), ..
} = &statement.node
{
if let ExprKind::Call { func, .. } = &expr.node {
if !is_allowed_func(&checker.ctx, func) {
checker.diagnostics.push(Diagnostic::new(
FunctionCallInDataclassDefaultArgument {
name: compose_call_path(func),
},
Range::from(expr),
));
}
}
}
}
}

/// RUF008
pub fn mutable_dataclass_default(checker: &mut Checker, body: &[Stmt]) {
for statement in body {
if let StmtKind::AnnAssign {
value: Some(value), ..
}
| StmtKind::Assign { value, .. } = &statement.node
{
if is_mutable_expr(value) {
checker
.diagnostics
.push(Diagnostic::new(MutableDataclassDefault, Range::from(value)));
}
}
}
}

pub fn is_dataclass(checker: &Checker, decorator_list: &[Expr]) -> bool {
decorator_list.iter().any(|decorator| {
checker
.ctx
.resolve_call_path(map_callable(decorator))
.map_or(false, |call_path| {
call_path.as_slice() == ["dataclasses", "dataclass"]
})
})
}
mosauter marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
---
source: crates/ruff/src/rules/ruff/mod.rs
expression: diagnostics
---
- kind:
name: MutableDataclassDefault
body: Do not use mutable default values for dataclass attributes
suggestion: ~
fixable: false
location:
row: 8
column: 33
end_location:
row: 8
column: 35
fix:
edits: []
parent: ~
- kind:
name: MutableDataclassDefault
body: Do not use mutable default values for dataclass attributes
suggestion: ~
fixable: false
location:
row: 9
column: 25
end_location:
row: 9
column: 27
fix:
edits: []
parent: ~
- kind:
name: MutableDataclassDefault
body: Do not use mutable default values for dataclass attributes
suggestion: ~
fixable: false
location:
row: 17
column: 33
end_location:
row: 17
column: 35
fix:
edits: []
parent: ~
- kind:
name: MutableDataclassDefault
body: Do not use mutable default values for dataclass attributes
suggestion: ~
fixable: false
location:
row: 18
column: 25
end_location:
row: 18
column: 27
fix:
edits: []
parent: ~

Loading