Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion crates/ruff_db/src/diagnostic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use ruff_annotate_snippets::Level as AnnotateLevel;
use ruff_text_size::{Ranged, TextRange, TextSize};

pub use self::render::{
DisplayDiagnostic, DisplayDiagnostics, FileResolver, Input, ceil_char_boundary,
DisplayDiagnostic, DisplayDiagnostics, DummyFileResolver, FileResolver, Input,
ceil_char_boundary,
github::{DisplayGithubDiagnostics, GithubRenderer},
};
use crate::{Db, files::File};
Expand Down
25 changes: 25 additions & 0 deletions crates/ruff_db/src/diagnostic/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1170,6 +1170,31 @@ pub fn ceil_char_boundary(text: &str, offset: TextSize) -> TextSize {
.unwrap_or_else(|| TextSize::from(upper_bound))
}

/// A stub implementation of [`FileResolver`] intended for testing.
pub struct DummyFileResolver;

impl FileResolver for DummyFileResolver {
fn path(&self, _file: File) -> &str {
unimplemented!()
}

fn input(&self, _file: File) -> Input {
unimplemented!()
}

fn notebook_index(&self, _file: &UnifiedFile) -> Option<NotebookIndex> {
None
}

fn is_notebook(&self, _file: &UnifiedFile) -> bool {
false
}

fn current_directory(&self) -> &Path {
Path::new(".")
}
}

#[cfg(test)]
mod tests {

Expand Down
3 changes: 1 addition & 2 deletions crates/ruff_linter/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ use crate::settings::types::CompiledPerFileIgnoreList;
pub fn get_cwd() -> &'static Path {
#[cfg(target_arch = "wasm32")]
{
static CWD: std::sync::LazyLock<PathBuf> = std::sync::LazyLock::new(|| PathBuf::from("."));
&CWD
Path::new(".")
}
#[cfg(not(target_arch = "wasm32"))]
path_absolutize::path_dedot::CWD.as_path()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,8 @@
f'{1: abcd "{1}" }'
f'{1: abcd "{'aa'}" }'
f'{1=: "abcd {'aa'}}'
# FIXME(brent) This should not be a syntax error on output. The escaped quotes are in the format
# spec, which is valid even before 3.12.
f'{x:a{z:hy "user"}} \'\'\''

# Changing the outer quotes is fine because the format-spec is in a nested expression.
Expand Down
172 changes: 136 additions & 36 deletions crates/ruff_python_formatter/tests/fixtures.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
use crate::normalizer::Normalizer;
use itertools::Itertools;
use ruff_db::diagnostic::{
Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, DisplayDiagnosticConfig,
DisplayDiagnostics, DummyFileResolver, Severity, Span, SubDiagnostic, SubDiagnosticSeverity,
};
use ruff_formatter::FormatOptions;
use ruff_python_ast::Mod;
use ruff_python_ast::comparable::ComparableMod;
use ruff_python_ast::visitor::source_order::SourceOrderVisitor;
use ruff_python_formatter::{PreviewMode, PyFormatOptions, format_module_source, format_range};
use ruff_python_parser::{ParseOptions, UnsupportedSyntaxError, parse};
use ruff_source_file::{LineIndex, OneIndexed};
use ruff_python_parser::{ParseOptions, Parsed, UnsupportedSyntaxError, parse};
use ruff_source_file::{LineIndex, OneIndexed, SourceFileBuilder};
use ruff_text_size::{Ranged, TextRange, TextSize};
use rustc_hash::FxHashMap;
use similar::TextDiff;
Expand Down Expand Up @@ -193,7 +198,8 @@ fn format() {
writeln!(snapshot, "## Outputs").unwrap();

for (i, options) in options.into_iter().enumerate() {
let formatted_code = format_file(&content, &options, input_path);
let (formatted_code, unsupported_syntax_errors) =
format_file(&content, &options, input_path);

writeln!(
snapshot,
Expand All @@ -210,7 +216,7 @@ fn format() {

// We want to capture the differences in the preview style in our fixtures
let options_preview = options.with_preview(PreviewMode::Enabled);
let formatted_preview = format_file(&content, &options_preview, input_path);
let (formatted_preview, _) = format_file(&content, &options_preview, input_path);

if formatted_code != formatted_preview {
// Having both snapshots makes it hard to see the difference, so we're keeping only
Expand All @@ -227,14 +233,28 @@ fn format() {
)
.unwrap();
}

if !unsupported_syntax_errors.is_empty() {
writeln!(
snapshot,
"### Unsupported Syntax Errors\n{}",
DisplayDiagnostics::new(
&DummyFileResolver,
&DisplayDiagnosticConfig::default().format(DiagnosticFormat::Full),
&unsupported_syntax_errors
)
)
.unwrap();
}
}
} else {
// We want to capture the differences in the preview style in our fixtures
let options = PyFormatOptions::from_extension(input_path);
let formatted_code = format_file(&content, &options, input_path);
let (formatted_code, unsupported_syntax_errors) =
format_file(&content, &options, input_path);

let options_preview = options.with_preview(PreviewMode::Enabled);
let formatted_preview = format_file(&content, &options_preview, input_path);
let (formatted_preview, _) = format_file(&content, &options_preview, input_path);

if formatted_code == formatted_preview {
writeln!(
Expand All @@ -259,6 +279,19 @@ fn format() {
)
.unwrap();
}

if !unsupported_syntax_errors.is_empty() {
writeln!(
snapshot,
"## Unsupported Syntax Errors\n{}",
DisplayDiagnostics::new(
&DummyFileResolver,
&DisplayDiagnosticConfig::default().format(DiagnosticFormat::Full),
&unsupported_syntax_errors
)
)
.unwrap();
}
}

insta::with_settings!({
Expand All @@ -277,7 +310,11 @@ fn format() {
);
}

fn format_file(source: &str, options: &PyFormatOptions, input_path: &Path) -> String {
fn format_file(
source: &str,
options: &PyFormatOptions,
input_path: &Path,
) -> (String, Vec<Diagnostic>) {
let (unformatted, formatted_code) = if source.contains("<RANGE_START>") {
let mut content = source.to_string();
let without_markers = content
Expand Down Expand Up @@ -337,9 +374,10 @@ fn format_file(source: &str, options: &PyFormatOptions, input_path: &Path) -> St
(Cow::Borrowed(source), formatted_code)
};

ensure_unchanged_ast(&unformatted, &formatted_code, options, input_path);
let unsupported_syntax_errors =
ensure_unchanged_ast(&unformatted, &formatted_code, options, input_path);

formatted_code
(formatted_code, unsupported_syntax_errors)
}

/// Format another time and make sure that there are no changes anymore
Expand Down Expand Up @@ -391,12 +429,23 @@ Formatted twice:
/// Like Black, there are a few exceptions to this "invariant" which are encoded in
/// [`NormalizedMod`] and related structs. Namely, formatting can change indentation within strings,
/// and can also flatten tuples within `del` statements.
///
/// Returns any new [`UnsupportedSyntaxError`]s in the formatted code as [`Diagnostic`]s for
/// snapshotting.
///
/// As noted in the sub-diagnostic message, new syntax errors should only be accepted when they are
/// the result of an existing syntax error in the input. For example, the formatter knows that
/// escapes in f-strings are only allowed after Python 3.12, so it can replace escaped quotes with
/// reused outer quote characters, which are also valid after 3.12, even if the configured Python
/// version is lower. Such cases disrupt the fingerprint filter because the syntax error, and thus
/// its fingerprint, is different from the input syntax error. More typical cases like using a
/// t-string before 3.14 will be filtered out and not included in snapshots.
fn ensure_unchanged_ast(
unformatted_code: &str,
formatted_code: &str,
options: &PyFormatOptions,
input_path: &Path,
) {
) -> Vec<Diagnostic> {
let source_type = options.source_type();

// Parse the unformatted code.
Expand All @@ -407,7 +456,7 @@ fn ensure_unchanged_ast(
.expect("Unformatted code to be valid syntax");

let unformatted_unsupported_syntax_errors =
collect_unsupported_syntax_errors(unformatted_parsed.unsupported_syntax_errors());
collect_unsupported_syntax_errors(&unformatted_parsed);
let mut unformatted_ast = unformatted_parsed.into_syntax();

Normalizer.visit_module(&mut unformatted_ast);
Expand All @@ -422,29 +471,31 @@ fn ensure_unchanged_ast(

// Assert that there are no new unsupported syntax errors
let mut formatted_unsupported_syntax_errors =
collect_unsupported_syntax_errors(formatted_parsed.unsupported_syntax_errors());
collect_unsupported_syntax_errors(&formatted_parsed);

formatted_unsupported_syntax_errors
.retain(|fingerprint, _| !unformatted_unsupported_syntax_errors.contains_key(fingerprint));

if !formatted_unsupported_syntax_errors.is_empty() {
let index = LineIndex::from_source_text(formatted_code);
panic!(
"Formatted code `{}` introduced new unsupported syntax errors:\n---\n{}\n---",
input_path.display(),
formatted_unsupported_syntax_errors
.into_values()
.map(|error| {
let location = index.line_column(error.start(), formatted_code);
format!(
"{row}:{col} {error}",
row = location.line,
col = location.column
)
})
.join("\n")
);
}
let file = SourceFileBuilder::new(
input_path.file_name().unwrap().to_string_lossy(),
formatted_code,
)
.finish();
let diagnostics = formatted_unsupported_syntax_errors
.values()
.map(|error| {
let mut diag = Diagnostic::new(DiagnosticId::InvalidSyntax, Severity::Error, error);
let span = Span::from(file.clone()).with_range(error.range());
diag.annotate(Annotation::primary(span));
let sub = SubDiagnostic::new(
SubDiagnosticSeverity::Warning,
"Only accept new syntax errors if they are also present in the input. \
The formatter should not introduce syntax errors.",
);
diag.sub(sub);
diag
})
.collect::<Vec<_>>();

let mut formatted_ast = formatted_parsed.into_syntax();
Normalizer.visit_module(&mut formatted_ast);
Expand All @@ -466,6 +517,8 @@ fn ensure_unchanged_ast(
input_path.display(),
);
}

diagnostics
}

struct Header<'a> {
Expand Down Expand Up @@ -537,22 +590,64 @@ source_type = {source_type:?}"#,
}
}

/// A visitor to collect a sequence of node IDs for fingerprinting [`UnsupportedSyntaxError`]s.
///
/// It visits each statement in the AST in source order and saves its range. The index of the node
/// enclosing a syntax error's range can then be retrieved with the `node_id` method. This `node_id`
/// should be stable across formatting runs since the formatter won't add or remove statements.
struct StmtVisitor {
nodes: Vec<TextRange>,
}

impl StmtVisitor {
fn new(parsed: &Parsed<Mod>) -> Self {
let mut visitor = Self { nodes: Vec::new() };
visitor.visit_mod(parsed.syntax());
visitor
}

/// Return the index of the statement node that contains `range`.
fn node_id(&self, range: TextRange) -> usize {
self.nodes
.iter()
.enumerate()
.filter(|(_, node)| node.contains_range(range))
.min_by_key(|(_, node)| node.len())
.expect("Expected an enclosing node in the AST")
.0
Comment on lines +611 to +617
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with this as I don't think it matters much. But if you're interested. Here's an example using partition_point to find the first node that starts at or before a given offset.

let start = self
.raw
.partition_point(|comment| comment.start() < range.start());

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah okay, thank you! I found the examples above that using binary_search_by, but partition_point is good to know about.

I think I'll just leave it as-is for now if it doesn't matter too much.

}
}

impl<'a> SourceOrderVisitor<'a> for StmtVisitor {
fn visit_stmt(&mut self, stmt: &'a ruff_python_ast::Stmt) {
self.nodes.push(stmt.range());
ruff_python_ast::visitor::source_order::walk_stmt(self, stmt);
}
}

/// Collects the unsupported syntax errors and assigns a unique hash to each error.
fn collect_unsupported_syntax_errors(
errors: &[UnsupportedSyntaxError],
parsed: &Parsed<Mod>,
) -> FxHashMap<u64, UnsupportedSyntaxError> {
let mut collected = FxHashMap::default();

for error in errors {
let mut error_fingerprint = fingerprint_unsupported_syntax_error(error, 0);
if parsed.unsupported_syntax_errors().is_empty() {
return collected;
}

let visitor = StmtVisitor::new(parsed);

for error in parsed.unsupported_syntax_errors() {
let node_id = visitor.node_id(error.range);
let mut error_fingerprint = fingerprint_unsupported_syntax_error(error, node_id, 0);

// Make sure that we do not get a fingerprint that is already in use
// by adding in the previously generated one.
loop {
match collected.entry(error_fingerprint) {
Entry::Occupied(_) => {
error_fingerprint =
fingerprint_unsupported_syntax_error(error, error_fingerprint);
fingerprint_unsupported_syntax_error(error, node_id, error_fingerprint);
}
Entry::Vacant(entry) => {
entry.insert(error.clone());
Expand All @@ -565,7 +660,11 @@ fn collect_unsupported_syntax_errors(
collected
}

fn fingerprint_unsupported_syntax_error(error: &UnsupportedSyntaxError, salt: u64) -> u64 {
fn fingerprint_unsupported_syntax_error(
error: &UnsupportedSyntaxError,
node_id: usize,
salt: u64,
) -> u64 {
let mut hasher = DefaultHasher::new();

let UnsupportedSyntaxError {
Expand All @@ -579,6 +678,7 @@ fn fingerprint_unsupported_syntax_error(error: &UnsupportedSyntaxError, salt: u6
salt.hash(&mut hasher);
kind.hash(&mut hasher);
target_version.hash(&mut hasher);
node_id.hash(&mut hasher);

hasher.finish()
}
Loading