Skip to content

Commit d4ef8d6

Browse files
committed
[ty] Use an interval map for scopes by expression
1 parent d078951 commit d4ef8d6

File tree

4 files changed

+158
-22
lines changed

4 files changed

+158
-22
lines changed

crates/ruff_python_ast/src/node_index.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ impl NodeIndex {
4848
pub fn as_usize(self) -> usize {
4949
self.0 as _
5050
}
51+
52+
pub fn as_u32(self) -> u32 {
53+
self.0
54+
}
55+
}
56+
57+
impl From<u32> for NodeIndex {
58+
fn from(value: u32) -> Self {
59+
NodeIndex(value)
60+
}
5161
}
5262

5363
impl From<u32> for AtomicNodeIndex {

crates/ty_python_semantic/src/semantic_index.rs

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use ruff_db::files::File;
55
use ruff_db::parsed::parsed_module;
66
use ruff_index::{IndexSlice, IndexVec};
77

8+
use ruff_python_ast::NodeIndex;
89
use ruff_python_parser::semantic_errors::SemanticSyntaxError;
910
use rustc_hash::{FxHashMap, FxHashSet};
1011
use salsa::Update;
@@ -24,6 +25,7 @@ use crate::semantic_index::place::{
2425
ScopeKind, ScopedPlaceId,
2526
};
2627
use crate::semantic_index::use_def::{EagerSnapshotKey, ScopedEagerSnapshotId, UseDefMap};
28+
use crate::semantic_model::HasTrackedScope;
2729
use crate::util::get_size::untracked_arc_size;
2830

2931
pub mod ast_ids;
@@ -179,7 +181,7 @@ pub(crate) struct SemanticIndex<'db> {
179181
scopes: IndexVec<FileScopeId, Scope>,
180182

181183
/// Map expressions to their corresponding scope.
182-
scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>,
184+
scopes_by_expression: ExpressionsScopeMap,
183185

184186
/// Map from a node creating a definition to its definition.
185187
definitions_by_node: FxHashMap<DefinitionNodeKey, Definitions<'db>>,
@@ -247,25 +249,26 @@ impl<'db> SemanticIndex<'db> {
247249

248250
/// Returns the ID of the `expression`'s enclosing scope.
249251
#[track_caller]
250-
pub(crate) fn expression_scope_id(
251-
&self,
252-
expression: impl Into<ExpressionNodeKey>,
253-
) -> FileScopeId {
254-
self.scopes_by_expression[&expression.into()]
252+
pub(crate) fn expression_scope_id<E>(&self, expression: &E) -> FileScopeId
253+
where
254+
E: HasTrackedScope,
255+
{
256+
self.try_expression_scope_id(expression)
257+
.expect("Expression to be part of a scope if it is from the same module")
255258
}
256259

257260
/// Returns the ID of the `expression`'s enclosing scope.
258-
pub(crate) fn try_expression_scope_id(
259-
&self,
260-
expression: impl Into<ExpressionNodeKey>,
261-
) -> Option<FileScopeId> {
262-
self.scopes_by_expression.get(&expression.into()).copied()
261+
pub(crate) fn try_expression_scope_id<E>(&self, expression: &E) -> Option<FileScopeId>
262+
where
263+
E: HasTrackedScope,
264+
{
265+
self.scopes_by_expression.try_get(expression)
263266
}
264267

265268
/// Returns the [`Scope`] of the `expression`'s enclosing scope.
266269
#[allow(unused)]
267270
#[track_caller]
268-
pub(crate) fn expression_scope(&self, expression: impl Into<ExpressionNodeKey>) -> &Scope {
271+
pub(crate) fn expression_scope(&self, expression: &impl HasTrackedScope) -> &Scope {
269272
&self.scopes[self.expression_scope_id(expression)]
270273
}
271274

@@ -583,6 +586,40 @@ impl<'a> Iterator for ChildrenIter<'a> {
583586

584587
impl FusedIterator for ChildrenIter<'_> {}
585588

589+
/// Interval map that maps a range of expression node ids to their corresponding scopes.
590+
///
591+
/// Lookups require `O(log n)` time, where `n` is roughly the number of scopes (roughly
592+
/// becaues sub-scopes can be interleaved with expressions in the outer scope, e.g. function, some statements, a function).
593+
#[derive(Eq, PartialEq, Debug, get_size2::GetSize)]
594+
struct ExpressionsScopeMap {
595+
scopes_by_expression: Box<[(std::ops::Range<NodeIndex>, FileScopeId)]>,
596+
}
597+
598+
impl ExpressionsScopeMap {
599+
fn try_get<E>(&self, node: &E) -> Option<FileScopeId>
600+
where
601+
E: HasTrackedScope,
602+
{
603+
let node_index = node.node_index().load();
604+
605+
let entry = self
606+
.scopes_by_expression
607+
.binary_search_by_key(&node_index, |(range, _)| range.start);
608+
609+
let index = match entry {
610+
Ok(index) => index,
611+
Err(index) => index.checked_sub(1)?,
612+
};
613+
614+
let (range, scope_id) = &self.scopes_by_expression[index];
615+
if range.contains(&node_index) {
616+
Some(*scope_id)
617+
} else {
618+
None
619+
}
620+
}
621+
}
622+
586623
#[cfg(test)]
587624
mod tests {
588625
use ruff_db::files::{File, system_path_to_file};

crates/ty_python_semantic/src/semantic_index/builder.rs

Lines changed: 84 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::cell::{OnceCell, RefCell};
22
use std::sync::Arc;
33

44
use except_handlers::TryNodeContextStackManager;
5+
use itertools::Itertools;
56
use rustc_hash::{FxHashMap, FxHashSet};
67

78
use ruff_db::files::File;
@@ -10,7 +11,7 @@ use ruff_db::source::{SourceText, source_text};
1011
use ruff_index::IndexVec;
1112
use ruff_python_ast::name::Name;
1213
use ruff_python_ast::visitor::{Visitor, walk_expr, walk_pattern, walk_stmt};
13-
use ruff_python_ast::{self as ast, PySourceType, PythonVersion};
14+
use ruff_python_ast::{self as ast, NodeIndex, PySourceType, PythonVersion};
1415
use ruff_python_parser::semantic_errors::{
1516
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind,
1617
};
@@ -45,7 +46,8 @@ use crate::semantic_index::reachability_constraints::{
4546
use crate::semantic_index::use_def::{
4647
EagerSnapshotKey, FlowSnapshot, ScopedEagerSnapshotId, UseDefMapBuilder,
4748
};
48-
use crate::semantic_index::{ArcUseDefMap, SemanticIndex};
49+
use crate::semantic_index::{ArcUseDefMap, ExpressionsScopeMap, SemanticIndex};
50+
use crate::semantic_model::HasTrackedScope;
4951
use crate::unpack::{Unpack, UnpackKind, UnpackPosition, UnpackValue};
5052
use crate::{Db, Program};
5153

@@ -102,7 +104,7 @@ pub(super) struct SemanticIndexBuilder<'db, 'ast> {
102104
ast_ids: IndexVec<FileScopeId, AstIdsBuilder>,
103105
use_def_maps: IndexVec<FileScopeId, UseDefMapBuilder<'db>>,
104106
scopes_by_node: FxHashMap<NodeWithScopeKey, FileScopeId>,
105-
scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>,
107+
scopes_by_expression: ExpressionsScopeMapBuilder,
106108
globals_by_scope: FxHashMap<FileScopeId, FxHashSet<ScopedPlaceId>>,
107109
definitions_by_node: FxHashMap<DefinitionNodeKey, Definitions<'db>>,
108110
expressions_by_node: FxHashMap<ExpressionNodeKey, Expression<'db>>,
@@ -137,7 +139,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
137139
scope_ids_by_scope: IndexVec::new(),
138140
use_def_maps: IndexVec::new(),
139141

140-
scopes_by_expression: FxHashMap::default(),
142+
scopes_by_expression: ExpressionsScopeMapBuilder::new(),
141143
scopes_by_node: FxHashMap::default(),
142144
definitions_by_node: FxHashMap::default(),
143145
expressions_by_node: FxHashMap::default(),
@@ -1011,7 +1013,6 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
10111013
place_tables.shrink_to_fit();
10121014
use_def_maps.shrink_to_fit();
10131015
ast_ids.shrink_to_fit();
1014-
self.scopes_by_expression.shrink_to_fit();
10151016
self.definitions_by_node.shrink_to_fit();
10161017

10171018
self.scope_ids_by_scope.shrink_to_fit();
@@ -1028,7 +1029,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
10281029
scope_ids_by_scope: self.scope_ids_by_scope,
10291030
globals_by_scope: self.globals_by_scope,
10301031
ast_ids,
1031-
scopes_by_expression: self.scopes_by_expression,
1032+
scopes_by_expression: self.scopes_by_expression.build(),
10321033
scopes_by_node: self.scopes_by_node,
10331034
use_def_maps,
10341035
imported_modules: Arc::new(self.imported_modules),
@@ -1901,7 +1902,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
19011902
self.with_semantic_checker(|semantic, context| semantic.visit_expr(expr, context));
19021903

19031904
self.scopes_by_expression
1904-
.insert(expr.into(), self.current_scope());
1905+
.record_expression(expr, self.current_scope());
19051906

19061907
let node_key = NodeKey::from_node(expr);
19071908

@@ -2539,3 +2540,79 @@ fn dunder_all_extend_argument(value: &ast::Expr) -> Option<&ast::Expr> {
25392540

25402541
(attr == "__all__").then_some(value)
25412542
}
2543+
2544+
/// Builds a map of expressions to their enclosing scopes.
2545+
///
2546+
/// Ideally, we would condense the expression node ids to scoope index directly when
2547+
/// registering the expression. However, there are a few things that make this hard (but maybe still possible?)
2548+
///
2549+
/// * The expression ids are assigned in source order, but we visit the expressions in semantic order.
2550+
/// There's no difference for most expressions but some expressions will be registered out of order.
2551+
/// * The expression ids aren't always consecutive elements (in fact, they normally aren't). That makes it
2552+
/// harder to detect out of order expressions.
2553+
struct ExpressionsScopeMapBuilder {
2554+
expression_and_scope: Vec<(NodeIndex, FileScopeId)>,
2555+
out_of_order: Vec<(NodeIndex, FileScopeId)>,
2556+
}
2557+
2558+
impl ExpressionsScopeMapBuilder {
2559+
fn new() -> Self {
2560+
Self {
2561+
expression_and_scope: vec![],
2562+
out_of_order: vec![],
2563+
}
2564+
}
2565+
2566+
// Ugh, the expression ranges can be off because they are in source order but we visit the
2567+
// expressions in semantic order.
2568+
fn record_expression(&mut self, expression: &impl HasTrackedScope, scope: FileScopeId) {
2569+
let expression_index = expression.node_index().load();
2570+
2571+
if self
2572+
.expression_and_scope
2573+
.last()
2574+
.is_some_and(|last| last.0 > expression_index)
2575+
{
2576+
self.out_of_order.push((expression_index, scope));
2577+
} else {
2578+
self.expression_and_scope.push((expression_index, scope));
2579+
}
2580+
}
2581+
2582+
fn build(mut self) -> ExpressionsScopeMap {
2583+
self.out_of_order.sort_by_key(|(index, _)| *index);
2584+
2585+
let mut condensed = Vec::new();
2586+
2587+
let mut iter = self
2588+
.expression_and_scope
2589+
.into_iter()
2590+
.merge_by(self.out_of_order, |left, right| left.0 <= right.0);
2591+
2592+
let Some(first) = iter.next() else {
2593+
return ExpressionsScopeMap {
2594+
scopes_by_expression: Box::default(),
2595+
};
2596+
};
2597+
2598+
let mut current_scope = first.1;
2599+
let mut range = first.0..NodeIndex::from(first.0.as_u32() + 1);
2600+
2601+
for (index, scope) in iter {
2602+
if scope == current_scope {
2603+
range.end = NodeIndex::from(index.as_u32() + 1);
2604+
continue;
2605+
}
2606+
2607+
condensed.push((range, current_scope));
2608+
current_scope = scope;
2609+
range = index..NodeIndex::from(index.as_u32() + 1);
2610+
}
2611+
2612+
condensed.push((range, current_scope));
2613+
2614+
ExpressionsScopeMap {
2615+
scopes_by_expression: condensed.into_boxed_slice(),
2616+
}
2617+
}
2618+
}

crates/ty_python_semantic/src/semantic_model.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use ruff_db::files::{File, FilePath};
22
use ruff_db::source::line_index;
33
use ruff_python_ast as ast;
4-
use ruff_python_ast::{Expr, ExprRef, name::Name};
4+
use ruff_python_ast::{Expr, ExprRef, HasNodeIndex, name::Name};
55
use ruff_source_file::LineIndex;
66

77
use crate::Db;
@@ -108,7 +108,7 @@ impl<'db> SemanticModel<'db> {
108108
// expression that we're in, then just
109109
// fall back to the global scope.
110110
None => Some(FileScopeId::global()),
111-
Some(expr) => index.try_expression_scope_id(expr),
111+
Some(expr) => index.try_expression_scope_id(&expr),
112112
},
113113
}) else {
114114
return vec![];
@@ -155,7 +155,7 @@ pub trait HasType {
155155
impl HasType for ast::ExprRef<'_> {
156156
fn inferred_type<'db>(&self, model: &SemanticModel<'db>) -> Type<'db> {
157157
let index = semantic_index(model.db, model.file);
158-
let file_scope = index.expression_scope_id(*self);
158+
let file_scope = index.expression_scope_id(self);
159159
let scope = file_scope.to_scope_id(model.db, model.file);
160160

161161
infer_scope_types(model.db, scope).expression_type(*self)
@@ -277,6 +277,18 @@ impl HasType for ast::Alias {
277277
}
278278
}
279279

280+
/// Implemented by types for which the semantic index tracks their scope.
281+
pub(crate) trait HasTrackedScope: HasNodeIndex {}
282+
283+
impl HasTrackedScope for ast::Expr {}
284+
285+
impl HasTrackedScope for ast::ExprRef<'_> {}
286+
impl HasTrackedScope for &ast::ExprRef<'_> {}
287+
288+
// See https://github.com/astral-sh/ty/issues/572 why this implementation exists
289+
// even when we never register identifiers during semantic index building.
290+
impl HasTrackedScope for ast::Identifier {}
291+
280292
#[cfg(test)]
281293
mod tests {
282294
use ruff_db::files::system_path_to_file;

0 commit comments

Comments
 (0)