Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pylint] unspecified-encoding (W1514) #3416

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions crates/ruff/resources/test/fixtures/pylint/unspecified_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""Warnings for using open() without specifying an encoding"""
import dataclasses
import io
import locale
from pathlib import Path
from typing import Optional

FILENAME = "foo.bar"
open(FILENAME, "w", encoding="utf-8")
open(FILENAME, "wb")
open(FILENAME, "w+b")
open(FILENAME) # [unspecified-encoding]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One issue here is that this rule is in conflict with UP015, which removes redundant open modes (so, e.g., it changes open("foo", "r") to open("foo")). We'd have to mark these rules as incompatible in the INCOMPATIBLE_CODES slice in crates/ruff/src/registry.rs.

open(FILENAME, "wt") # [unspecified-encoding]
open(FILENAME, "w+") # [unspecified-encoding]
open(FILENAME, "w", encoding=None) # [unspecified-encoding]
open(FILENAME, "r") # [unspecified-encoding]

with open(FILENAME, encoding="utf8", errors="surrogateescape") as f:
pass

LOCALE_ENCODING = locale.getlocale()[1]
with open(FILENAME, encoding=LOCALE_ENCODING) as f:
pass

with open(FILENAME) as f: # [unspecified-encoding]
pass

with open(FILENAME, encoding=None) as f: # [unspecified-encoding]
pass

LOCALE_ENCODING = None
with open(FILENAME, encoding=LOCALE_ENCODING) as f: # [unspecified-encoding]
pass

io.open(FILENAME, "w+b")
io.open_code(FILENAME)
io.open(FILENAME) # [unspecified-encoding]
io.open(FILENAME, "wt") # [unspecified-encoding]
io.open(FILENAME, "w+") # [unspecified-encoding]
io.open(FILENAME, "w", encoding=None) # [unspecified-encoding]

with io.open(FILENAME, encoding="utf8", errors="surrogateescape") as f:
pass

LOCALE_ENCODING = locale.getlocale()[1]
with io.open(FILENAME, encoding=LOCALE_ENCODING) as f:
pass

with io.open(FILENAME) as f: # [unspecified-encoding]
pass

with io.open(FILENAME, encoding=None) as f: # [unspecified-encoding]
pass

LOCALE_ENCODING = None
with io.open(FILENAME, encoding=LOCALE_ENCODING) as f: # [unspecified-encoding]
pass

LOCALE_ENCODING = locale.getlocale()[1]
Path(FILENAME).read_text(encoding=LOCALE_ENCODING)
Path(FILENAME).read_text(encoding="utf8")
Path(FILENAME).read_text("utf8")

LOCALE_ENCODING = None
Path(FILENAME).read_text() # [unspecified-encoding]
Path(FILENAME).read_text(encoding=None) # [unspecified-encoding]
Path(FILENAME).read_text(encoding=LOCALE_ENCODING) # [unspecified-encoding]

LOCALE_ENCODING = locale.getlocale()[1]
Path(FILENAME).write_text("string", encoding=LOCALE_ENCODING)
Path(FILENAME).write_text("string", encoding="utf8")

LOCALE_ENCODING = None
Path(FILENAME).write_text("string") # [unspecified-encoding]
Path(FILENAME).write_text("string", encoding=None) # [unspecified-encoding]
Path(FILENAME).write_text("string", encoding=LOCALE_ENCODING) # [unspecified-encoding]

LOCALE_ENCODING = locale.getlocale()[1]
Path(FILENAME).open("w+b")
Path(FILENAME).open() # [unspecified-encoding]
Path(FILENAME).open("wt") # [unspecified-encoding]
Path(FILENAME).open("w+") # [unspecified-encoding]
Path(FILENAME).open("w", encoding=None) # [unspecified-encoding]
Path(FILENAME).open("w", encoding=LOCALE_ENCODING)


# Tests for storing data about open calls.
# Most of these are regression tests for a crash
# reported in https://github.com/PyCQA/pylint/issues/5321

# -- Constants
MODE = "wb"
open(FILENAME, mode=MODE)


# -- Functions
def return_mode_function():
"""Return a mode for open call"""
return "wb"

open(FILENAME, mode=return_mode_function())


# -- Classes
class IOData:
"""Class that returns mode strings"""

mode = "wb"

def __init__(self):
self.my_mode = "wb"

@staticmethod
def my_mode_method():
"""Returns a pre-defined mode"""
return "wb"

@staticmethod
def my_mode_method_returner(mode: str) -> str:
"""Returns the supplied mode"""
return mode


open(FILENAME, mode=IOData.mode)
open(FILENAME, mode=IOData().my_mode)
open(FILENAME, mode=IOData().my_mode_method())
open(FILENAME, mode=IOData().my_mode_method_returner("wb"))
# Invalid value but shouldn't crash, reported in https://github.com/PyCQA/pylint/issues/5321
open(FILENAME, mode=IOData)


# -- Dataclasses
@dataclasses.dataclass
class IOArgs:
"""Dataclass storing information about how to open a file"""

encoding: Optional[str]
mode: str


args_good_one = IOArgs(encoding=None, mode="wb")

# Test for crash reported in https://github.com/PyCQA/pylint/issues/5321
open(FILENAME, args_good_one.mode, encoding=args_good_one.encoding)

# Positional arguments
open(FILENAME, "w", -1, "utf-8")
open(FILENAME, "w", -1) # [unspecified-encoding]

Path(FILENAME).open("w", -1, "utf-8")
Path(FILENAME).open("w", -1) # [unspecified-encoding]

Path(FILENAME).read_text("utf-8")
Path(FILENAME).read_text() # [unspecified-encoding]

Path(FILENAME).write_text("string", "utf-8")
Path(FILENAME).write_text("string") # [unspecified-encoding]

# Test for crash reported in https://github.com/PyCQA/pylint/issues/5731
open(FILENAME, mode=None) # [bad-open-mode, unspecified-encoding]

# Test for crash reported in https://github.com/PyCQA/pylint/issues/6414
open('foo', mode=2) # [bad-open-mode, unspecified-encoding]
4 changes: 4 additions & 0 deletions crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2866,6 +2866,10 @@ where
pylint::rules::bad_str_strip_call(self, func, args);
}

if self.settings.rules.enabled(&Rule::UnspecifiedEncoding) {
pylint::rules::unspecified_encoding(self, func, args, keywords);
}

// flake8-pytest-style
if self.settings.rules.enabled(&Rule::PatchWithLambda) {
if let Some(diagnostic) =
Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<Rule> {
(Pylint, "R0912") => Rule::TooManyBranches,
(Pylint, "R0915") => Rule::TooManyStatements,
(Pylint, "W2901") => Rule::RedefinedLoopName,
(Pylint, "W1514") => Rule::UnspecifiedEncoding,

// flake8-builtins
(Flake8Builtins, "001") => Rule::BuiltinVariableShadowing,
Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ ruff_macros::register_rules!(
rules::pylint::rules::RedefinedLoopName,
rules::pylint::rules::LoggingTooFewArgs,
rules::pylint::rules::LoggingTooManyArgs,
rules::pylint::rules::UnspecifiedEncoding,
// flake8-builtins
rules::flake8_builtins::rules::BuiltinVariableShadowing,
rules::flake8_builtins::rules::BuiltinArgumentShadowing,
Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/rules/pylint/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ mod tests {
#[test_case(Rule::BadStrStripCall, Path::new("bad_str_strip_call.py"); "PLE01310")]
#[test_case(Rule::YieldInInit, Path::new("yield_in_init.py"); "PLE0100")]
#[test_case(Rule::RedefinedLoopName, Path::new("redefined_loop_name.py"); "PLW2901")]
#[test_case(Rule::UnspecifiedEncoding, Path::new("unspecified_encoding.py"); "PLW1514")]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff/src/rules/pylint/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub use too_many_statements::{too_many_statements, TooManyStatements};
pub use unnecessary_direct_lambda_call::{
unnecessary_direct_lambda_call, UnnecessaryDirectLambdaCall,
};
pub use unspecified_encoding::{unspecified_encoding, UnspecifiedEncoding};
pub use use_from_import::{use_from_import, ConsiderUsingFromImport};
pub use used_prior_global_declaration::{
used_prior_global_declaration, UsedPriorGlobalDeclaration,
Expand Down Expand Up @@ -54,6 +55,7 @@ mod too_many_branches;
mod too_many_return_statements;
mod too_many_statements;
mod unnecessary_direct_lambda_call;
mod unspecified_encoding;
mod use_from_import;
mod used_prior_global_declaration;
mod useless_else_on_loop;
Expand Down
98 changes: 98 additions & 0 deletions crates/ruff/src/rules/pylint/rules/unspecified_encoding.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use ruff_macros::{derive_message_formats, violation};
use rustpython_parser::ast::{Constant, Expr, ExprKind, Keyword};

use crate::checkers::ast::Checker;
use crate::registry::Diagnostic;
use crate::violation::Violation;
use ruff_python_ast::helpers::SimpleCallArgs;
use ruff_python_ast::types::Range;

/// W1514: Using open without explicitly specifying an encoding
/// (unspecified-encoding)
///
/// ## What it does
/// Checks for a valid encoding field when opening files.
///
/// ## Why is this bad?
/// It is better to specify an encoding when opening documents.
/// Using the system default implicitly can create problems on other operating systems.
/// See https://peps.python.org/pep-0597/
///
/// ## Example
/// ```python
/// def foo(file_path):
/// with open(file_path) as file: # [unspecified-encoding]
/// contents = file.read()
/// ```
///
/// Use instead:
/// ```python
/// def foo(file_path):
/// with open(file_path, encoding="utf-8") as file:
/// contents = file.read()
/// ```
#[violation]
pub struct UnspecifiedEncoding;

impl Violation for UnspecifiedEncoding {
#[derive_message_formats]
fn message(&self) -> String {
format!("Using open without explicitly specifying an encoding")
}
}

const OPEN_FUNC_NAME: &str = "open";
const ENCODING_MODE_ARGUMENT: &str = "mode";
const ENCODING_KEYWORD_ARGUMENT: &str = "encoding";

/// W1514
pub fn unspecified_encoding(
checker: &mut Checker,
func: &Expr,
args: &[Expr],
keywords: &[Keyword],
) {
// If `open` has been rebound, skip this check entirely.
if !checker.ctx.is_builtin(OPEN_FUNC_NAME) {
return;
}
// Look for open().
if !matches!(&func.node, ExprKind::Name {id, ..} if id == OPEN_FUNC_NAME) {
return;
}
Comment on lines +55 to +62
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I recommend switching the is_builtin check and testing the function name because testing is_builtin is more expensive than comparing two strings

Suggested change
// If `open` has been rebound, skip this check entirely.
if !checker.ctx.is_builtin(OPEN_FUNC_NAME) {
return;
}
// Look for open().
if !matches!(&func.node, ExprKind::Name {id, ..} if id == OPEN_FUNC_NAME) {
return;
}
// Look for open().
if !matches!(&func.node, ExprKind::Name {id, ..} if id == OPEN_FUNC_NAME) {
return;
}
// If `open` has been rebound, skip this check entirely.
if !checker.ctx.is_builtin(OPEN_FUNC_NAME) {
return;
}


// If the mode arg has "b", skip this check.
let call_args = SimpleCallArgs::new(args, keywords);
let mode_arg = call_args.get_argument(ENCODING_MODE_ARGUMENT, Some(1));
if let Some(mode) = mode_arg {
if let ExprKind::Constant {
value: Constant::Str(mode_param_value),
..
} = &mode.node
{
if mode_param_value.as_str().contains('b') {
return;
}
}
}

// Check encoding for missing or None values.
let encoding_arg = call_args.get_argument(ENCODING_KEYWORD_ARGUMENT, Some(3));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I recommend inlining the ENCODING_KEYWORD_ARGUMENT constant because it is only used once and it can break the reading flow if I have to jump to the top to understand what the value of the constant is.

if let Some(keyword) = encoding_arg {
// encoding=None
if let ExprKind::Constant {
value: Constant::None,
..
} = &keyword.node
{
checker
.diagnostics
.push(Diagnostic::new(UnspecifiedEncoding, Range::from(func)));
}
} else {
// Encoding not found
checker
.diagnostics
.push(Diagnostic::new(UnspecifiedEncoding, Range::from(func)));
}
}
Loading